xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 0bdbc534f55137a4724bb01c5722bc8c0492b9b5)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*0bdbc534SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.92 1997/12/06 00:08:36 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
143b2fbd54SBarry Smith 
15537820f0SBarry Smith /*
16537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
1757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
1857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
1957b952d6SSatish Balay    length of colmap equals the global matrix length.
2057b952d6SSatish Balay */
215615d1e5SSatish Balay #undef __FUNC__
225615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
2357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2457b952d6SSatish Balay {
2557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
27928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
2857b952d6SSatish Balay 
29d64ed03dSBarry Smith   PetscFunctionBegin;
30758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3157b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3257b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
33928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
343a40ed3dSBarry Smith   PetscFunctionReturn(0);
3557b952d6SSatish Balay }
3657b952d6SSatish Balay 
3780c1aa95SSatish Balay #define CHUNKSIZE  10
3880c1aa95SSatish Balay 
39f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
4080c1aa95SSatish Balay { \
4180c1aa95SSatish Balay  \
4280c1aa95SSatish Balay     brow = row/bs;  \
4380c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
44ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
4580c1aa95SSatish Balay       bcol = col/bs; \
4680c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
47ab26458aSBarry Smith       low = 0; high = nrow; \
48ab26458aSBarry Smith       while (high-low > 3) { \
49ab26458aSBarry Smith         t = (low+high)/2; \
50ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
51ab26458aSBarry Smith         else              low  = t; \
52ab26458aSBarry Smith       } \
53ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
5480c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
5580c1aa95SSatish Balay         if (rp[_i] == bcol) { \
5680c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
57eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
58eada6651SSatish Balay           else                    *bap  = value;  \
59ac7a638eSSatish Balay           goto a_noinsert; \
6080c1aa95SSatish Balay         } \
6180c1aa95SSatish Balay       } \
6289280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
63a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
6480c1aa95SSatish Balay       if (nrow >= rmax) { \
6580c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
6680c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
6780c1aa95SSatish Balay         Scalar *new_a; \
6880c1aa95SSatish Balay  \
69a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
7089280ab3SLois Curfman McInnes  \
7180c1aa95SSatish Balay         /* malloc new storage space */ \
7280c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
7380c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
7480c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
7580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
7680c1aa95SSatish Balay  \
7780c1aa95SSatish Balay         /* copy over old data into new slots */ \
7880c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
7980c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
8080c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
8180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
8280c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
8380c1aa95SSatish Balay                                                            len*sizeof(int)); \
8480c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
8580c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
8680c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
8780c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
8880c1aa95SSatish Balay         /* free up old matrix storage */ \
8980c1aa95SSatish Balay         PetscFree(a->a);  \
9080c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
9180c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
9280c1aa95SSatish Balay         a->singlemalloc = 1; \
9380c1aa95SSatish Balay  \
9480c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
9680c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
9780c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
9880c1aa95SSatish Balay         a->reallocs++; \
9980c1aa95SSatish Balay         a->nz++; \
10080c1aa95SSatish Balay       } \
10180c1aa95SSatish Balay       N = nrow++ - 1;  \
10280c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
10380c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
10480c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
10580c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
10680c1aa95SSatish Balay       } \
10780c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
10880c1aa95SSatish Balay       rp[_i]                      = bcol;  \
10980c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
110ac7a638eSSatish Balay       a_noinsert:; \
11180c1aa95SSatish Balay     ailen[brow] = nrow; \
11280c1aa95SSatish Balay }
11357b952d6SSatish Balay 
114ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
115ac7a638eSSatish Balay { \
116ac7a638eSSatish Balay  \
117ac7a638eSSatish Balay     brow = row/bs;  \
118ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
119ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
120ac7a638eSSatish Balay       bcol = col/bs; \
121ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
122ac7a638eSSatish Balay       low = 0; high = nrow; \
123ac7a638eSSatish Balay       while (high-low > 3) { \
124ac7a638eSSatish Balay         t = (low+high)/2; \
125ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
126ac7a638eSSatish Balay         else              low  = t; \
127ac7a638eSSatish Balay       } \
128ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
129ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
130ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
131ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
132ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
133ac7a638eSSatish Balay           else                    *bap  = value;  \
134ac7a638eSSatish Balay           goto b_noinsert; \
135ac7a638eSSatish Balay         } \
136ac7a638eSSatish Balay       } \
13789280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
138a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139ac7a638eSSatish Balay       if (nrow >= rmax) { \
140ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
14174c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
142ac7a638eSSatish Balay         Scalar *new_a; \
143ac7a638eSSatish Balay  \
144a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
14589280ab3SLois Curfman McInnes  \
146ac7a638eSSatish Balay         /* malloc new storage space */ \
14774c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
148ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
149ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
150ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
151ac7a638eSSatish Balay  \
152ac7a638eSSatish Balay         /* copy over old data into new slots */ \
153ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
15474c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
156ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
157ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
158ac7a638eSSatish Balay                                                            len*sizeof(int)); \
159ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
160ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
161ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
162ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
163ac7a638eSSatish Balay         /* free up old matrix storage */ \
16474c639caSSatish Balay         PetscFree(b->a);  \
16574c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
16674c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
16774c639caSSatish Balay         b->singlemalloc = 1; \
168ac7a638eSSatish Balay  \
169ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
17174c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
17274c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
17374c639caSSatish Balay         b->reallocs++; \
17474c639caSSatish Balay         b->nz++; \
175ac7a638eSSatish Balay       } \
176ac7a638eSSatish Balay       N = nrow++ - 1;  \
177ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
178ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
179ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
180ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
181ac7a638eSSatish Balay       } \
182ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
183ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
184ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
185ac7a638eSSatish Balay       b_noinsert:; \
186ac7a638eSSatish Balay     bilen[brow] = nrow; \
187ac7a638eSSatish Balay }
188ac7a638eSSatish Balay 
1895615d1e5SSatish Balay #undef __FUNC__
1905615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
191ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
19257b952d6SSatish Balay {
19357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
19457b952d6SSatish Balay   Scalar      value;
1954fa0d573SSatish Balay   int         ierr,i,j,row,col;
1964fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1974fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1984fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
19957b952d6SSatish Balay 
200eada6651SSatish Balay   /* Some Variables required in the macro */
20180c1aa95SSatish Balay   Mat         A = baij->A;
20280c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
203ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204ac7a638eSSatish Balay   Scalar      *aa=a->a;
205ac7a638eSSatish Balay 
206ac7a638eSSatish Balay   Mat         B = baij->B;
207ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
208ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209ac7a638eSSatish Balay   Scalar      *ba=b->a;
210ac7a638eSSatish Balay 
211ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
212ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
213ac7a638eSSatish Balay   Scalar      *ap,*bap;
21480c1aa95SSatish Balay 
215d64ed03dSBarry Smith   PetscFunctionBegin;
21657b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
2173a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
218a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
219a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220639f9d9dSBarry Smith #endif
22157b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
22257b952d6SSatish Balay       row = im[i] - rstart_orig;
22357b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
22457b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
22557b952d6SSatish Balay           col = in[j] - cstart_orig;
22657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
22880c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
22957b952d6SSatish Balay         }
2303a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
231a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
232a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
233639f9d9dSBarry Smith #endif
23457b952d6SSatish Balay         else {
23557b952d6SSatish Balay           if (mat->was_assembled) {
236905e6a2fSBarry Smith             if (!baij->colmap) {
237905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238905e6a2fSBarry Smith             }
239905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
24057b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
24157b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
24257b952d6SSatish Balay               col =  in[j];
2439bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2449bf004c3SSatish Balay               B = baij->B;
2459bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2469bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2479bf004c3SSatish Balay               ba=b->a;
24857b952d6SSatish Balay             }
249d64ed03dSBarry Smith           } else col = in[j];
25057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
252ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25357b952d6SSatish Balay         }
25457b952d6SSatish Balay       }
255d64ed03dSBarry Smith     } else {
25690f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
25757b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258d64ed03dSBarry Smith       } else {
25990f02eecSBarry Smith         if (!baij->donotstash) {
26057b952d6SSatish Balay           row = im[i];
26157b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
26257b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
26357b952d6SSatish Balay           }
26457b952d6SSatish Balay         }
26557b952d6SSatish Balay       }
26657b952d6SSatish Balay     }
26790f02eecSBarry Smith   }
2683a40ed3dSBarry Smith   PetscFunctionReturn(0);
26957b952d6SSatish Balay }
27057b952d6SSatish Balay 
271ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
272ab26458aSBarry Smith #undef __FUNC__
273ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
274ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
275ab26458aSBarry Smith {
276ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27730793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
278abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
279ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
280ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
281ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
282ab26458aSBarry Smith 
28330793edcSSatish Balay   if(!barray) {
28447513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
28530793edcSSatish Balay   }
28630793edcSSatish Balay 
287ab26458aSBarry Smith   if (roworiented) {
288ab26458aSBarry Smith     stepval = (n-1)*bs;
289ab26458aSBarry Smith   } else {
290ab26458aSBarry Smith     stepval = (m-1)*bs;
291ab26458aSBarry Smith   }
292ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
2933a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
294a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
295a8c6a408SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
296ab26458aSBarry Smith #endif
297ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
298ab26458aSBarry Smith       row = im[i] - rstart;
299ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
30015b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
30115b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
30215b57d14SSatish Balay           barray = v + i*bs2;
30315b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
30415b57d14SSatish Balay           barray = v + j*bs2;
30515b57d14SSatish Balay         } else { /* Here a copy is required */
306ab26458aSBarry Smith           if (roworiented) {
307ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
308ab26458aSBarry Smith           } else {
309ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
310abef11f7SSatish Balay           }
31147513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
31247513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
31330793edcSSatish Balay               *barray++  = *value++;
31447513183SBarry Smith             }
31547513183SBarry Smith           }
31630793edcSSatish Balay           barray -=bs2;
31715b57d14SSatish Balay         }
318abef11f7SSatish Balay 
319abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
320abef11f7SSatish Balay           col  = in[j] - cstart;
32130793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
322ab26458aSBarry Smith         }
3233a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
324a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
325a8c6a408SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
326ab26458aSBarry Smith #endif
327ab26458aSBarry Smith         else {
328ab26458aSBarry Smith           if (mat->was_assembled) {
329ab26458aSBarry Smith             if (!baij->colmap) {
330ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
331ab26458aSBarry Smith             }
332a5eb4965SSatish Balay 
3333a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
334a8c6a408SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
335a5eb4965SSatish Balay #endif
336a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
337ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
338ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
339ab26458aSBarry Smith               col =  in[j];
340ab26458aSBarry Smith             }
341ab26458aSBarry Smith           }
342ab26458aSBarry Smith           else col = in[j];
34330793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344ab26458aSBarry Smith         }
345ab26458aSBarry Smith       }
346d64ed03dSBarry Smith     } else {
347ab26458aSBarry Smith       if (!baij->donotstash) {
348ab26458aSBarry Smith         if (roworiented ) {
349abef11f7SSatish Balay           row   = im[i]*bs;
350abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
351abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
352abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
353abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
354abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
355abef11f7SSatish Balay               }
356ab26458aSBarry Smith             }
357ab26458aSBarry Smith           }
358d64ed03dSBarry Smith         } else {
359ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
360abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
361abef11f7SSatish Balay             col   = in[j]*bs;
362abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
363abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
364abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
365ab26458aSBarry Smith               }
366ab26458aSBarry Smith             }
367ab26458aSBarry Smith           }
368abef11f7SSatish Balay         }
369abef11f7SSatish Balay       }
370ab26458aSBarry Smith     }
371ab26458aSBarry Smith   }
3723a40ed3dSBarry Smith   PetscFunctionReturn(0);
373ab26458aSBarry Smith }
374*0bdbc534SSatish Balay #include <math.h>
375*0bdbc534SSatish Balay #define HASH_KEY 0.6180339887
376*0bdbc534SSatish Balay #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1)))
377ab26458aSBarry Smith 
3785615d1e5SSatish Balay #undef __FUNC__
379*0bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
380*0bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
381*0bdbc534SSatish Balay {
382*0bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
383*0bdbc534SSatish Balay   int         ierr,i,j,row,col;
384*0bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
385*0bdbc534SSatish Balay   int         rend_orig=baij->rend_bs;
386*0bdbc534SSatish Balay 
387*0bdbc534SSatish Balay   int         h1,key,size=baij->ht_size,k,bs=baij->bs;
388*0bdbc534SSatish Balay   double      * HT  = baij->ht;
389*0bdbc534SSatish Balay   Scalar      ** HD = (Scalar **)(HT + size),value;
390*0bdbc534SSatish Balay 
391*0bdbc534SSatish Balay 
392*0bdbc534SSatish Balay   PetscFunctionBegin;
393*0bdbc534SSatish Balay 
394*0bdbc534SSatish Balay   if(((Mat_SeqBAIJ*)baij->A->data)->nonew !=1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
395*0bdbc534SSatish Balay 
396*0bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
397*0bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
398*0bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
399*0bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
400*0bdbc534SSatish Balay #endif
401*0bdbc534SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
402*0bdbc534SSatish Balay       row = im[i];
403*0bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
404*0bdbc534SSatish Balay         col = in[j];
405*0bdbc534SSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
406*0bdbc534SSatish Balay         /* Look up into the Hash Table */
407*0bdbc534SSatish Balay         key = (row/bs)*baij->Nbs+(col/bs)+1;
408*0bdbc534SSatish Balay         h1  = HASH1(size,key);
409*0bdbc534SSatish Balay 
410*0bdbc534SSatish Balay         for ( k=0; k<size; k++ ){
411*0bdbc534SSatish Balay           if (HT[(h1+k)%size] == key) {
412*0bdbc534SSatish Balay             if (addv == ADD_VALUES) *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) += value;
413*0bdbc534SSatish Balay             else                    *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs)  = value;
414*0bdbc534SSatish Balay             break;
415*0bdbc534SSatish Balay           }
416*0bdbc534SSatish Balay         }
417*0bdbc534SSatish Balay         if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found");
418*0bdbc534SSatish Balay       }
419*0bdbc534SSatish Balay     } else {
420*0bdbc534SSatish Balay       if (roworiented && !baij->donotstash) {
421*0bdbc534SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
422*0bdbc534SSatish Balay       } else {
423*0bdbc534SSatish Balay         if (!baij->donotstash) {
424*0bdbc534SSatish Balay           row = im[i];
425*0bdbc534SSatish Balay 	  for ( j=0; j<n; j++ ) {
426*0bdbc534SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
427*0bdbc534SSatish Balay           }
428*0bdbc534SSatish Balay         }
429*0bdbc534SSatish Balay       }
430*0bdbc534SSatish Balay     }
431*0bdbc534SSatish Balay   }
432*0bdbc534SSatish Balay   PetscFunctionReturn(0);
433*0bdbc534SSatish Balay }
434*0bdbc534SSatish Balay 
435*0bdbc534SSatish Balay #undef __FUNC__
436*0bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
437*0bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
438*0bdbc534SSatish Balay {
439*0bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
440*0bdbc534SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
441*0bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
442*0bdbc534SSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs;
443*0bdbc534SSatish Balay 
444*0bdbc534SSatish Balay   int         h1,key,size=baij->ht_size;
445*0bdbc534SSatish Balay   double      * HT  = baij->ht;
446*0bdbc534SSatish Balay   Scalar      ** HD = (Scalar **)(HT + size),*value,*baij_a;
447*0bdbc534SSatish Balay 
448*0bdbc534SSatish Balay 
449*0bdbc534SSatish Balay   if (roworiented) {
450*0bdbc534SSatish Balay     stepval = (n-1)*bs;
451*0bdbc534SSatish Balay   } else {
452*0bdbc534SSatish Balay     stepval = (m-1)*bs;
453*0bdbc534SSatish Balay   }
454*0bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
455*0bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
456*0bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
457*0bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
458*0bdbc534SSatish Balay #endif
459*0bdbc534SSatish Balay     if (im[i] >= rstart && im[i] < rend) {
460*0bdbc534SSatish Balay       row = im[i];
461*0bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
462*0bdbc534SSatish Balay         col = in[j];
463*0bdbc534SSatish Balay 
464*0bdbc534SSatish Balay         /* Look up into the Hash Table */
465*0bdbc534SSatish Balay         key = row*baij->Nbs+col+1;
466*0bdbc534SSatish Balay         h1  = HASH1(size,key);
467*0bdbc534SSatish Balay 
468*0bdbc534SSatish Balay         for ( k=0; k<size; k++ ){
469*0bdbc534SSatish Balay           if (HT[(h1+k)%size] == key) {
470*0bdbc534SSatish Balay             baij_a = HD[(h1+k)%size];
471*0bdbc534SSatish Balay 
472*0bdbc534SSatish Balay             if (roworiented) {
473*0bdbc534SSatish Balay               value = v + i*(stepval+bs)*bs + j*bs;
474*0bdbc534SSatish Balay             } else {
475*0bdbc534SSatish Balay               value = v + j*(stepval+bs)*bs + i*bs;
476*0bdbc534SSatish Balay             }
477*0bdbc534SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval ) {
478*0bdbc534SSatish Balay               for (jj=0; jj<bs; jj++ ) {
479*0bdbc534SSatish Balay                 if (addv == ADD_VALUES) *baij_a  += *value;
480*0bdbc534SSatish Balay                 else                    *baij_a   = *value;
481*0bdbc534SSatish Balay                 baij++; value++;
482*0bdbc534SSatish Balay               }
483*0bdbc534SSatish Balay             }
484*0bdbc534SSatish Balay             break;
485*0bdbc534SSatish Balay           }
486*0bdbc534SSatish Balay         }
487*0bdbc534SSatish Balay         if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found");
488*0bdbc534SSatish Balay       }
489*0bdbc534SSatish Balay     } else {
490*0bdbc534SSatish Balay       if (!baij->donotstash) {
491*0bdbc534SSatish Balay         if (roworiented ) {
492*0bdbc534SSatish Balay           row   = im[i]*bs;
493*0bdbc534SSatish Balay           value = v + i*(stepval+bs)*bs;
494*0bdbc534SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
495*0bdbc534SSatish Balay             for ( k=0; k<n; k++ ) {
496*0bdbc534SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
497*0bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
498*0bdbc534SSatish Balay               }
499*0bdbc534SSatish Balay             }
500*0bdbc534SSatish Balay           }
501*0bdbc534SSatish Balay         } else {
502*0bdbc534SSatish Balay           for ( j=0; j<n; j++ ) {
503*0bdbc534SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
504*0bdbc534SSatish Balay             col   = in[j]*bs;
505*0bdbc534SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
506*0bdbc534SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
507*0bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
508*0bdbc534SSatish Balay               }
509*0bdbc534SSatish Balay             }
510*0bdbc534SSatish Balay           }
511*0bdbc534SSatish Balay         }
512*0bdbc534SSatish Balay       }
513*0bdbc534SSatish Balay     }
514*0bdbc534SSatish Balay   }
515*0bdbc534SSatish Balay   PetscFunctionReturn(0);
516*0bdbc534SSatish Balay }
517*0bdbc534SSatish Balay #undef __FUNC__
5185615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
519ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
520d6de1c52SSatish Balay {
521d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
522d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
523d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
524d6de1c52SSatish Balay 
525d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
526a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
527a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
528d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
529d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
530d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
531a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
532a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
533d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
534d6de1c52SSatish Balay           col = idxn[j] - bscstart;
535d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
536d64ed03dSBarry Smith         } else {
537905e6a2fSBarry Smith           if (!baij->colmap) {
538905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
539905e6a2fSBarry Smith           }
540e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
541dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
542d9d09a02SSatish Balay           else {
543dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
544d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
545d6de1c52SSatish Balay           }
546d6de1c52SSatish Balay         }
547d6de1c52SSatish Balay       }
548d64ed03dSBarry Smith     } else {
549a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
550d6de1c52SSatish Balay     }
551d6de1c52SSatish Balay   }
5523a40ed3dSBarry Smith   PetscFunctionReturn(0);
553d6de1c52SSatish Balay }
554d6de1c52SSatish Balay 
5555615d1e5SSatish Balay #undef __FUNC__
5565615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
557ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
558d6de1c52SSatish Balay {
559d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
560d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
561acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
562d6de1c52SSatish Balay   double     sum = 0.0;
563d6de1c52SSatish Balay   Scalar     *v;
564d6de1c52SSatish Balay 
565d64ed03dSBarry Smith   PetscFunctionBegin;
566d6de1c52SSatish Balay   if (baij->size == 1) {
567d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
568d6de1c52SSatish Balay   } else {
569d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
570d6de1c52SSatish Balay       v = amat->a;
571d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
5723a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
573d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
574d6de1c52SSatish Balay #else
575d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
576d6de1c52SSatish Balay #endif
577d6de1c52SSatish Balay       }
578d6de1c52SSatish Balay       v = bmat->a;
579d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
5803a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
581d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
582d6de1c52SSatish Balay #else
583d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
584d6de1c52SSatish Balay #endif
585d6de1c52SSatish Balay       }
586ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
587d6de1c52SSatish Balay       *norm = sqrt(*norm);
588d64ed03dSBarry Smith     } else {
589e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
590d6de1c52SSatish Balay     }
591d64ed03dSBarry Smith   }
5923a40ed3dSBarry Smith   PetscFunctionReturn(0);
593d6de1c52SSatish Balay }
59457b952d6SSatish Balay 
5955615d1e5SSatish Balay #undef __FUNC__
5965615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
597ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
59857b952d6SSatish Balay {
59957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
60057b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
60157b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
60257b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
60357b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
60457b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
60557b952d6SSatish Balay   InsertMode  addv;
60657b952d6SSatish Balay   Scalar      *rvalues,*svalues;
60757b952d6SSatish Balay 
608d64ed03dSBarry Smith   PetscFunctionBegin;
60957b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
610ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
61157b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
612a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
61357b952d6SSatish Balay   }
614e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
61557b952d6SSatish Balay 
61657b952d6SSatish Balay   /*  first count number of contributors to each processor */
61757b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
61857b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
61957b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
62057b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
62157b952d6SSatish Balay     idx = baij->stash.idx[i];
62257b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
62357b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
62457b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
62557b952d6SSatish Balay       }
62657b952d6SSatish Balay     }
62757b952d6SSatish Balay   }
62857b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
62957b952d6SSatish Balay 
63057b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
63157b952d6SSatish Balay   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
632ca161407SBarry Smith   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
63357b952d6SSatish Balay   nreceives = work[rank];
634ca161407SBarry Smith   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
63557b952d6SSatish Balay   nmax      = work[rank];
63657b952d6SSatish Balay   PetscFree(work);
63757b952d6SSatish Balay 
63857b952d6SSatish Balay   /* post receives:
63957b952d6SSatish Balay        1) each message will consist of ordered pairs
64057b952d6SSatish Balay      (global index,value) we store the global index as a double
64157b952d6SSatish Balay      to simplify the message passing.
64257b952d6SSatish Balay        2) since we don't know how long each individual message is we
64357b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
64457b952d6SSatish Balay      this is a lot of wasted space.
64557b952d6SSatish Balay 
64657b952d6SSatish Balay 
64757b952d6SSatish Balay        This could be done better.
64857b952d6SSatish Balay   */
649f8abc2e8SBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
650f8abc2e8SBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
65157b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
652ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
65357b952d6SSatish Balay   }
65457b952d6SSatish Balay 
65557b952d6SSatish Balay   /* do sends:
65657b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
65757b952d6SSatish Balay          the ith processor
65857b952d6SSatish Balay   */
65957b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
660d64ed03dSBarry Smith   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
66157b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
66257b952d6SSatish Balay   starts[0] = 0;
66357b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
66457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
66557b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
66657b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
66757b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
66857b952d6SSatish Balay   }
66957b952d6SSatish Balay   PetscFree(owner);
67057b952d6SSatish Balay   starts[0] = 0;
67157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
67257b952d6SSatish Balay   count = 0;
67357b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
67457b952d6SSatish Balay     if (procs[i]) {
675ca161407SBarry Smith       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
67657b952d6SSatish Balay     }
67757b952d6SSatish Balay   }
67857b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
67957b952d6SSatish Balay 
68057b952d6SSatish Balay   /* Free cache space */
681cd1fa5fbSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n);
68257b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
68357b952d6SSatish Balay 
68457b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
68557b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
68657b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
68757b952d6SSatish Balay   baij->rmax       = nmax;
68857b952d6SSatish Balay 
6893a40ed3dSBarry Smith   PetscFunctionReturn(0);
69057b952d6SSatish Balay }
691bd7f49f5SSatish Balay 
692bd7f49f5SSatish Balay int CreateHashTable(Mat mat,double factor)
693596b8d2eSBarry Smith {
694596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
695596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
696596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
697*0bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
698*0bdbc534SSatish Balay   int         size,ct=0,max1=0,max2=0,bs2=baij->bs2,rstart=baij->rstart;
699*0bdbc534SSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col;
700a07cd24cSSatish Balay   double      *HT,key;
701a07cd24cSSatish Balay   extern int  PetscGlobalRank;
702*0bdbc534SSatish Balay   Scalar      **HD;
703a07cd24cSSatish Balay   /*
704a07cd24cSSatish Balay      Scalar      *aa=a->a,*ba=b->a;
705596b8d2eSBarry Smith      static double *HT;
706596b8d2eSBarry Smith      static      int flag=1;
707a07cd24cSSatish Balay      */
708d64ed03dSBarry Smith   PetscFunctionBegin;
709*0bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
710*0bdbc534SSatish Balay   size = baij->ht_size;
711596b8d2eSBarry Smith   /* Allocate Memory for Hash Table */
712*0bdbc534SSatish Balay   if (baij->ht) {
713*0bdbc534SSatish Balay     PetscFunctionReturn(0);
714596b8d2eSBarry Smith   }
715*0bdbc534SSatish Balay 
716*0bdbc534SSatish Balay   baij->ht = (double*)PetscMalloc(size*(sizeof(double)+sizeof(Scalar*))+1); CHKPTRQ(baij->ht);
717a07cd24cSSatish Balay   HT = baij->ht;
718*0bdbc534SSatish Balay   HD = (Scalar**)(HT + size);
719*0bdbc534SSatish Balay   PetscMemzero(HT,size*sizeof(double)+sizeof(Scalar*));
720*0bdbc534SSatish Balay 
721596b8d2eSBarry Smith 
722596b8d2eSBarry Smith   /* Loop Over A */
723*0bdbc534SSatish Balay   for ( i=0; i<a->mbs; i++ ) {
724596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
725*0bdbc534SSatish Balay       row = i+rstart;
726*0bdbc534SSatish Balay       col = aj[j]+cstart;
727596b8d2eSBarry Smith 
728*0bdbc534SSatish Balay       key = row*baij->Nbs + col + 1;
729*0bdbc534SSatish Balay       /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */
730*0bdbc534SSatish Balay       h1  = HASH1(size,key);
731*0bdbc534SSatish Balay 
732*0bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
733*0bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
734*0bdbc534SSatish Balay           HT[(h1+k)%size] = key;
735*0bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
736596b8d2eSBarry Smith           break;
737596b8d2eSBarry Smith         } else ct++;
738596b8d2eSBarry Smith       }
739bd7f49f5SSatish Balay       if (k> max1) max1 = k;
740596b8d2eSBarry Smith     }
741596b8d2eSBarry Smith   }
742596b8d2eSBarry Smith   /* Loop Over B */
743*0bdbc534SSatish Balay   for ( i=0; i<b->mbs; i++ ) {
744596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
745*0bdbc534SSatish Balay       row = i+rstart;
746*0bdbc534SSatish Balay       col = garray[bj[j]];
747*0bdbc534SSatish Balay       key = row*baij->Nbs + col + 1;
748*0bdbc534SSatish Balay       /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */
749596b8d2eSBarry Smith       h1  = HASH1(size,key);
750*0bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
751*0bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
752*0bdbc534SSatish Balay           HT[(h1+k)%size] = key;
753*0bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
754596b8d2eSBarry Smith           break;
755596b8d2eSBarry Smith         } else ct++;
756596b8d2eSBarry Smith       }
757bd7f49f5SSatish Balay       if (k> max2) max2 = k;
758596b8d2eSBarry Smith     }
759596b8d2eSBarry Smith   }
760596b8d2eSBarry Smith 
761596b8d2eSBarry Smith   /* Print Summary */
762596b8d2eSBarry Smith   for ( i=0,key=0.0,j=0; i<size; i++)
763596b8d2eSBarry Smith     if (HT[i]) {j++;}
764596b8d2eSBarry Smith 
765*0bdbc534SSatish Balay   /*printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n",
766*0bdbc534SSatish Balay          PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j),j); */
767*0bdbc534SSatish Balay   fflush(stdout);
7683a40ed3dSBarry Smith   PetscFunctionReturn(0);
769596b8d2eSBarry Smith }
77057b952d6SSatish Balay 
7715615d1e5SSatish Balay #undef __FUNC__
7725615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
773ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
77457b952d6SSatish Balay {
77557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
77657b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
77757b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
778596b8d2eSBarry Smith   int         bs=baij->bs,row,col,other_disassembled,flg;
77957b952d6SSatish Balay   Scalar      *values,val;
780e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
78157b952d6SSatish Balay 
782d64ed03dSBarry Smith   PetscFunctionBegin;
78357b952d6SSatish Balay   /*  wait on receives */
78457b952d6SSatish Balay   while (count) {
785ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
78657b952d6SSatish Balay     /* unpack receives into our local space */
78757b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
788ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
78957b952d6SSatish Balay     n = n/3;
79057b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
79157b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
79257b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
79357b952d6SSatish Balay       val = values[3*i+2];
79457b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
79557b952d6SSatish Balay         col -= baij->cstart*bs;
7966fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
797d64ed03dSBarry Smith       } else {
79857b952d6SSatish Balay         if (mat->was_assembled) {
799905e6a2fSBarry Smith           if (!baij->colmap) {
800905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
801905e6a2fSBarry Smith           }
802a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
80357b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
80457b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
80557b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
80657b952d6SSatish Balay           }
80757b952d6SSatish Balay         }
8086fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
80957b952d6SSatish Balay       }
81057b952d6SSatish Balay     }
81157b952d6SSatish Balay     count--;
81257b952d6SSatish Balay   }
81357b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
81457b952d6SSatish Balay 
81557b952d6SSatish Balay   /* wait on sends */
81657b952d6SSatish Balay   if (baij->nsends) {
817d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
818ca161407SBarry Smith     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
81957b952d6SSatish Balay     PetscFree(send_status);
82057b952d6SSatish Balay   }
82157b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
82257b952d6SSatish Balay 
82357b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
82457b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
82557b952d6SSatish Balay 
82657b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
82757b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
828ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
82957b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
83057b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
83157b952d6SSatish Balay   }
83257b952d6SSatish Balay 
8336d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
83457b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
83557b952d6SSatish Balay   }
83657b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
83757b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
83857b952d6SSatish Balay 
839596b8d2eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr);
840*0bdbc534SSatish Balay   if (flg && !baij->ht) {
841a07cd24cSSatish Balay     double fact = 1.39;
842*0bdbc534SSatish Balay     CreateHashTable(mat,fact);
843*0bdbc534SSatish Balay     mat->ops.setvalues        = MatSetValues_MPIBAIJ_HT;
844*0bdbc534SSatish Balay     mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
845bd7f49f5SSatish Balay   }
84657b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
8473a40ed3dSBarry Smith   PetscFunctionReturn(0);
84857b952d6SSatish Balay }
84957b952d6SSatish Balay 
8505615d1e5SSatish Balay #undef __FUNC__
8515615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
85257b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
85357b952d6SSatish Balay {
85457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
85557b952d6SSatish Balay   int          ierr;
85657b952d6SSatish Balay 
857d64ed03dSBarry Smith   PetscFunctionBegin;
85857b952d6SSatish Balay   if (baij->size == 1) {
85957b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
860a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
8613a40ed3dSBarry Smith   PetscFunctionReturn(0);
86257b952d6SSatish Balay }
86357b952d6SSatish Balay 
8645615d1e5SSatish Balay #undef __FUNC__
8655615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
86657b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
86757b952d6SSatish Balay {
86857b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
869cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
87057b952d6SSatish Balay   FILE         *fd;
87157b952d6SSatish Balay   ViewerType   vtype;
87257b952d6SSatish Balay 
873d64ed03dSBarry Smith   PetscFunctionBegin;
87457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
87557b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
87657b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
877639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
8784e220ebcSLois Curfman McInnes       MatInfo info;
87957b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
88057b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
8814e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
88257b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
88357b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
8844e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
8854e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
8864e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
8874e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
8884e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
8894e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
89057b952d6SSatish Balay       fflush(fd);
89157b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
89257b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
8933a40ed3dSBarry Smith       PetscFunctionReturn(0);
894d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
895bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
8963a40ed3dSBarry Smith       PetscFunctionReturn(0);
89757b952d6SSatish Balay     }
89857b952d6SSatish Balay   }
89957b952d6SSatish Balay 
90057b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
90157b952d6SSatish Balay     Draw       draw;
90257b952d6SSatish Balay     PetscTruth isnull;
90357b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
9043a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
90557b952d6SSatish Balay   }
90657b952d6SSatish Balay 
90757b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
90857b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
90957b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
91057b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
91157b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
91257b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
91357b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
91457b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
91557b952d6SSatish Balay     fflush(fd);
91657b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
917d64ed03dSBarry Smith   } else {
91857b952d6SSatish Balay     int size = baij->size;
91957b952d6SSatish Balay     rank = baij->rank;
92057b952d6SSatish Balay     if (size == 1) {
92157b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
922d64ed03dSBarry Smith     } else {
92357b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
92457b952d6SSatish Balay       Mat         A;
92557b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
92657b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
92757b952d6SSatish Balay       int         mbs=baij->mbs;
92857b952d6SSatish Balay       Scalar      *a;
92957b952d6SSatish Balay 
93057b952d6SSatish Balay       if (!rank) {
93155843e3eSBarry Smith         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
932d64ed03dSBarry Smith       } else {
93355843e3eSBarry Smith         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
93457b952d6SSatish Balay       }
93557b952d6SSatish Balay       PLogObjectParent(mat,A);
93657b952d6SSatish Balay 
93757b952d6SSatish Balay       /* copy over the A part */
93857b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
93957b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
94057b952d6SSatish Balay       row = baij->rstart;
94157b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
94257b952d6SSatish Balay 
94357b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
94457b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
94557b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
94657b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
94757b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
94857b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
949cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
950cee3aa6bSSatish Balay             col++; a += bs;
95157b952d6SSatish Balay           }
95257b952d6SSatish Balay         }
95357b952d6SSatish Balay       }
95457b952d6SSatish Balay       /* copy over the B part */
95557b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
95657b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
95757b952d6SSatish Balay       row = baij->rstart*bs;
95857b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
95957b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
96057b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
96157b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
96257b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
96357b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
964cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
965cee3aa6bSSatish Balay             col++; a += bs;
96657b952d6SSatish Balay           }
96757b952d6SSatish Balay         }
96857b952d6SSatish Balay       }
96957b952d6SSatish Balay       PetscFree(rvals);
9706d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
9716d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
97255843e3eSBarry Smith       /*
97355843e3eSBarry Smith          Everyone has to call to draw the matrix since the graphics waits are
97455843e3eSBarry Smith          synchronized across all processors that share the Draw object
97555843e3eSBarry Smith       */
97655843e3eSBarry Smith       if (!rank || vtype == DRAW_VIEWER) {
97757b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
97857b952d6SSatish Balay       }
97957b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
98057b952d6SSatish Balay     }
98157b952d6SSatish Balay   }
9823a40ed3dSBarry Smith   PetscFunctionReturn(0);
98357b952d6SSatish Balay }
98457b952d6SSatish Balay 
98557b952d6SSatish Balay 
98657b952d6SSatish Balay 
9875615d1e5SSatish Balay #undef __FUNC__
9885615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
989ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
99057b952d6SSatish Balay {
99157b952d6SSatish Balay   Mat         mat = (Mat) obj;
99257b952d6SSatish Balay   int         ierr;
99357b952d6SSatish Balay   ViewerType  vtype;
99457b952d6SSatish Balay 
995d64ed03dSBarry Smith   PetscFunctionBegin;
99657b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
99757b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
99857b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
99957b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
10003a40ed3dSBarry Smith   } else if (vtype == BINARY_FILE_VIEWER) {
10013a40ed3dSBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
100257b952d6SSatish Balay   }
10033a40ed3dSBarry Smith   PetscFunctionReturn(0);
100457b952d6SSatish Balay }
100557b952d6SSatish Balay 
10065615d1e5SSatish Balay #undef __FUNC__
10075615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
1008ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
100979bdfe76SSatish Balay {
101079bdfe76SSatish Balay   Mat         mat = (Mat) obj;
101179bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
101279bdfe76SSatish Balay   int         ierr;
101379bdfe76SSatish Balay 
1014d64ed03dSBarry Smith   PetscFunctionBegin;
10153a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
101679bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
101779bdfe76SSatish Balay #endif
101879bdfe76SSatish Balay 
101983e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
102079bdfe76SSatish Balay   PetscFree(baij->rowners);
102179bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
102279bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
102379bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
102479bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
102579bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
102679bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
102779bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
102830793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
1029*0bdbc534SSatish Balay   if (baij->ht) PetscFree(baij->ht);
103079bdfe76SSatish Balay   PetscFree(baij);
103179bdfe76SSatish Balay   PLogObjectDestroy(mat);
103279bdfe76SSatish Balay   PetscHeaderDestroy(mat);
10333a40ed3dSBarry Smith   PetscFunctionReturn(0);
103479bdfe76SSatish Balay }
103579bdfe76SSatish Balay 
10365615d1e5SSatish Balay #undef __FUNC__
10375615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
1038ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1039cee3aa6bSSatish Balay {
1040cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
104147b4a8eaSLois Curfman McInnes   int         ierr, nt;
1042cee3aa6bSSatish Balay 
1043d64ed03dSBarry Smith   PetscFunctionBegin;
1044c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
104547b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1046a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
104747b4a8eaSLois Curfman McInnes   }
1048c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
104947b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1050a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
105147b4a8eaSLois Curfman McInnes   }
105243a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1053cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
105443a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1055cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
105643a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
10573a40ed3dSBarry Smith   PetscFunctionReturn(0);
1058cee3aa6bSSatish Balay }
1059cee3aa6bSSatish Balay 
10605615d1e5SSatish Balay #undef __FUNC__
10615615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
1062ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1063cee3aa6bSSatish Balay {
1064cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1065cee3aa6bSSatish Balay   int        ierr;
1066d64ed03dSBarry Smith 
1067d64ed03dSBarry Smith   PetscFunctionBegin;
106843a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1069cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
107043a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1071cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
10723a40ed3dSBarry Smith   PetscFunctionReturn(0);
1073cee3aa6bSSatish Balay }
1074cee3aa6bSSatish Balay 
10755615d1e5SSatish Balay #undef __FUNC__
10765615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
1077ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1078cee3aa6bSSatish Balay {
1079cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1080cee3aa6bSSatish Balay   int         ierr;
1081cee3aa6bSSatish Balay 
1082d64ed03dSBarry Smith   PetscFunctionBegin;
1083cee3aa6bSSatish Balay   /* do nondiagonal part */
1084cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1085cee3aa6bSSatish Balay   /* send it on its way */
1086537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1087cee3aa6bSSatish Balay   /* do local part */
1088cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1089cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1090cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1091cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1092639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
10933a40ed3dSBarry Smith   PetscFunctionReturn(0);
1094cee3aa6bSSatish Balay }
1095cee3aa6bSSatish Balay 
10965615d1e5SSatish Balay #undef __FUNC__
10975615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1098ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1099cee3aa6bSSatish Balay {
1100cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1101cee3aa6bSSatish Balay   int         ierr;
1102cee3aa6bSSatish Balay 
1103d64ed03dSBarry Smith   PetscFunctionBegin;
1104cee3aa6bSSatish Balay   /* do nondiagonal part */
1105cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1106cee3aa6bSSatish Balay   /* send it on its way */
1107537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1108cee3aa6bSSatish Balay   /* do local part */
1109cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1110cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1111cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1112cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1113537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
11143a40ed3dSBarry Smith   PetscFunctionReturn(0);
1115cee3aa6bSSatish Balay }
1116cee3aa6bSSatish Balay 
1117cee3aa6bSSatish Balay /*
1118cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1119cee3aa6bSSatish Balay    diagonal block
1120cee3aa6bSSatish Balay */
11215615d1e5SSatish Balay #undef __FUNC__
11225615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1123ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1124cee3aa6bSSatish Balay {
1125cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
11263a40ed3dSBarry Smith   int         ierr;
1127d64ed03dSBarry Smith 
1128d64ed03dSBarry Smith   PetscFunctionBegin;
1129a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
11303a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
11313a40ed3dSBarry Smith   PetscFunctionReturn(0);
1132cee3aa6bSSatish Balay }
1133cee3aa6bSSatish Balay 
11345615d1e5SSatish Balay #undef __FUNC__
11355615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
1136ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1137cee3aa6bSSatish Balay {
1138cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1139cee3aa6bSSatish Balay   int         ierr;
1140d64ed03dSBarry Smith 
1141d64ed03dSBarry Smith   PetscFunctionBegin;
1142cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1143cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
11443a40ed3dSBarry Smith   PetscFunctionReturn(0);
1145cee3aa6bSSatish Balay }
1146026e39d0SSatish Balay 
11475615d1e5SSatish Balay #undef __FUNC__
11485615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
1149ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
115057b952d6SSatish Balay {
115157b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1152d64ed03dSBarry Smith 
1153d64ed03dSBarry Smith   PetscFunctionBegin;
1154bd7f49f5SSatish Balay   if (m) *m = mat->M;
1155bd7f49f5SSatish Balay   if (n) *n = mat->N;
11563a40ed3dSBarry Smith   PetscFunctionReturn(0);
115757b952d6SSatish Balay }
115857b952d6SSatish Balay 
11595615d1e5SSatish Balay #undef __FUNC__
11605615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1161ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
116257b952d6SSatish Balay {
116357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1164d64ed03dSBarry Smith 
1165d64ed03dSBarry Smith   PetscFunctionBegin;
116657b952d6SSatish Balay   *m = mat->m; *n = mat->N;
11673a40ed3dSBarry Smith   PetscFunctionReturn(0);
116857b952d6SSatish Balay }
116957b952d6SSatish Balay 
11705615d1e5SSatish Balay #undef __FUNC__
11715615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1172ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
117357b952d6SSatish Balay {
117457b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1175d64ed03dSBarry Smith 
1176d64ed03dSBarry Smith   PetscFunctionBegin;
117757b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
11783a40ed3dSBarry Smith   PetscFunctionReturn(0);
117957b952d6SSatish Balay }
118057b952d6SSatish Balay 
1181acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1182acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1183acdf5bf4SSatish Balay 
11845615d1e5SSatish Balay #undef __FUNC__
11855615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1186acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1187acdf5bf4SSatish Balay {
1188acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1189acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1190acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1191d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1192d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1193acdf5bf4SSatish Balay 
1194d64ed03dSBarry Smith   PetscFunctionBegin;
1195a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1196acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1197acdf5bf4SSatish Balay 
1198acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1199acdf5bf4SSatish Balay     /*
1200acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1201acdf5bf4SSatish Balay     */
1202acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1203bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1204bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1205acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1206acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1207acdf5bf4SSatish Balay     }
1208acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1209acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1210acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1211acdf5bf4SSatish Balay   }
1212acdf5bf4SSatish Balay 
1213a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1214d9d09a02SSatish Balay   lrow = row - brstart;
1215acdf5bf4SSatish Balay 
1216acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1217acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1218acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1219acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1220acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1221acdf5bf4SSatish Balay   nztot = nzA + nzB;
1222acdf5bf4SSatish Balay 
1223acdf5bf4SSatish Balay   cmap  = mat->garray;
1224acdf5bf4SSatish Balay   if (v  || idx) {
1225acdf5bf4SSatish Balay     if (nztot) {
1226acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1227acdf5bf4SSatish Balay       int imark = -1;
1228acdf5bf4SSatish Balay       if (v) {
1229acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1230acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1231d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1232acdf5bf4SSatish Balay           else break;
1233acdf5bf4SSatish Balay         }
1234acdf5bf4SSatish Balay         imark = i;
1235acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1236acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1237acdf5bf4SSatish Balay       }
1238acdf5bf4SSatish Balay       if (idx) {
1239acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1240acdf5bf4SSatish Balay         if (imark > -1) {
1241acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1242bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1243acdf5bf4SSatish Balay           }
1244acdf5bf4SSatish Balay         } else {
1245acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1246d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1247d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1248acdf5bf4SSatish Balay             else break;
1249acdf5bf4SSatish Balay           }
1250acdf5bf4SSatish Balay           imark = i;
1251acdf5bf4SSatish Balay         }
1252d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1253d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1254acdf5bf4SSatish Balay       }
1255d64ed03dSBarry Smith     } else {
1256d212a18eSSatish Balay       if (idx) *idx = 0;
1257d212a18eSSatish Balay       if (v)   *v   = 0;
1258d212a18eSSatish Balay     }
1259acdf5bf4SSatish Balay   }
1260acdf5bf4SSatish Balay   *nz = nztot;
1261acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1262acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
12633a40ed3dSBarry Smith   PetscFunctionReturn(0);
1264acdf5bf4SSatish Balay }
1265acdf5bf4SSatish Balay 
12665615d1e5SSatish Balay #undef __FUNC__
12675615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1268acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1269acdf5bf4SSatish Balay {
1270acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1271d64ed03dSBarry Smith 
1272d64ed03dSBarry Smith   PetscFunctionBegin;
1273acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1274a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1275acdf5bf4SSatish Balay   }
1276acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
12773a40ed3dSBarry Smith   PetscFunctionReturn(0);
1278acdf5bf4SSatish Balay }
1279acdf5bf4SSatish Balay 
12805615d1e5SSatish Balay #undef __FUNC__
12815615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1282ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
12835a838052SSatish Balay {
12845a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1285d64ed03dSBarry Smith 
1286d64ed03dSBarry Smith   PetscFunctionBegin;
12875a838052SSatish Balay   *bs = baij->bs;
12883a40ed3dSBarry Smith   PetscFunctionReturn(0);
12895a838052SSatish Balay }
12905a838052SSatish Balay 
12915615d1e5SSatish Balay #undef __FUNC__
12925615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1293ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
129458667388SSatish Balay {
129558667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
129658667388SSatish Balay   int         ierr;
1297d64ed03dSBarry Smith 
1298d64ed03dSBarry Smith   PetscFunctionBegin;
129958667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
130058667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
13013a40ed3dSBarry Smith   PetscFunctionReturn(0);
130258667388SSatish Balay }
13030ac07820SSatish Balay 
13045615d1e5SSatish Balay #undef __FUNC__
13055615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1306ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
13070ac07820SSatish Balay {
13084e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
13094e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
13107d57db60SLois Curfman McInnes   int         ierr;
13117d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
13120ac07820SSatish Balay 
1313d64ed03dSBarry Smith   PetscFunctionBegin;
13144e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
13154e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
13164e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
13174e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
13184e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
13190ac07820SSatish Balay   if (flag == MAT_LOCAL) {
13204e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
13214e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
13224e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
13234e220ebcSLois Curfman McInnes     info->memory       = isend[3];
13244e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
13250ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1326ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr);
13274e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
13284e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
13294e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
13304e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
13314e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
13320ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1333ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr);
13344e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
13354e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
13364e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
13374e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
13384e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
13390ac07820SSatish Balay   }
13405a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
13415a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
13425a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
13435a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
13444e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
13454e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
13464e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
13473a40ed3dSBarry Smith   PetscFunctionReturn(0);
13480ac07820SSatish Balay }
13490ac07820SSatish Balay 
13505615d1e5SSatish Balay #undef __FUNC__
13515615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1352ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
135358667388SSatish Balay {
135458667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
135558667388SSatish Balay 
1356d64ed03dSBarry Smith   PetscFunctionBegin;
135758667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
135858667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
13596da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1360c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
136196854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1362c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1363b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1364b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1365b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1366aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
136758667388SSatish Balay         MatSetOption(a->A,op);
136858667388SSatish Balay         MatSetOption(a->B,op);
1369b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
13706da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
137158667388SSatish Balay              op == MAT_SYMMETRIC ||
137258667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
137358667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
137458667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
137558667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
137658667388SSatish Balay     a->roworiented = 0;
137758667388SSatish Balay     MatSetOption(a->A,op);
137858667388SSatish Balay     MatSetOption(a->B,op);
13792b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
138090f02eecSBarry Smith     a->donotstash = 1;
1381d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1382d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1383d64ed03dSBarry Smith   } else {
1384d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1385d64ed03dSBarry Smith   }
13863a40ed3dSBarry Smith   PetscFunctionReturn(0);
138758667388SSatish Balay }
138858667388SSatish Balay 
13895615d1e5SSatish Balay #undef __FUNC__
13905615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1391ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
13920ac07820SSatish Balay {
13930ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
13940ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
13950ac07820SSatish Balay   Mat        B;
13960ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
13970ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
13980ac07820SSatish Balay   Scalar     *a;
13990ac07820SSatish Balay 
1400d64ed03dSBarry Smith   PetscFunctionBegin;
1401a8c6a408SBarry Smith   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
14020ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
14030ac07820SSatish Balay   CHKERRQ(ierr);
14040ac07820SSatish Balay 
14050ac07820SSatish Balay   /* copy over the A part */
14060ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
14070ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
14080ac07820SSatish Balay   row = baij->rstart;
14090ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
14100ac07820SSatish Balay 
14110ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
14120ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
14130ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
14140ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
14150ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
14160ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
14170ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
14180ac07820SSatish Balay         col++; a += bs;
14190ac07820SSatish Balay       }
14200ac07820SSatish Balay     }
14210ac07820SSatish Balay   }
14220ac07820SSatish Balay   /* copy over the B part */
14230ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
14240ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
14250ac07820SSatish Balay   row = baij->rstart*bs;
14260ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
14270ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
14280ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
14290ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
14300ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
14310ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
14320ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
14330ac07820SSatish Balay         col++; a += bs;
14340ac07820SSatish Balay       }
14350ac07820SSatish Balay     }
14360ac07820SSatish Balay   }
14370ac07820SSatish Balay   PetscFree(rvals);
14380ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
14390ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
14400ac07820SSatish Balay 
14410ac07820SSatish Balay   if (matout != PETSC_NULL) {
14420ac07820SSatish Balay     *matout = B;
14430ac07820SSatish Balay   } else {
14440ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
14450ac07820SSatish Balay     PetscFree(baij->rowners);
14460ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
14470ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
14480ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
14490ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
14500ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
14510ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
14520ac07820SSatish Balay     PetscFree(baij);
1453f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
14540ac07820SSatish Balay     PetscHeaderDestroy(B);
14550ac07820SSatish Balay   }
14563a40ed3dSBarry Smith   PetscFunctionReturn(0);
14570ac07820SSatish Balay }
14580e95ebc0SSatish Balay 
14595615d1e5SSatish Balay #undef __FUNC__
14605615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
14610e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
14620e95ebc0SSatish Balay {
14630e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
14640e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
14650e95ebc0SSatish Balay   int ierr,s1,s2,s3;
14660e95ebc0SSatish Balay 
1467d64ed03dSBarry Smith   PetscFunctionBegin;
14680e95ebc0SSatish Balay   if (ll)  {
14690e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
14700e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1471a8c6a408SBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
14720e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
14730e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
14740e95ebc0SSatish Balay   }
1475a8c6a408SBarry Smith   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
14763a40ed3dSBarry Smith   PetscFunctionReturn(0);
14770e95ebc0SSatish Balay }
14780e95ebc0SSatish Balay 
14795615d1e5SSatish Balay #undef __FUNC__
14805615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1481ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
14820ac07820SSatish Balay {
14830ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
14840ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1485a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
14860ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
14870ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1488a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
14890ac07820SSatish Balay   MPI_Comm       comm = A->comm;
14900ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
14910ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
14920ac07820SSatish Balay   IS             istmp;
14930ac07820SSatish Balay 
1494d64ed03dSBarry Smith   PetscFunctionBegin;
14950ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
14960ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
14970ac07820SSatish Balay 
14980ac07820SSatish Balay   /*  first count number of contributors to each processor */
14990ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
15000ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
15010ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
15020ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
15030ac07820SSatish Balay     idx = rows[i];
15040ac07820SSatish Balay     found = 0;
15050ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
15060ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
15070ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
15080ac07820SSatish Balay       }
15090ac07820SSatish Balay     }
1510a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
15110ac07820SSatish Balay   }
15120ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
15130ac07820SSatish Balay 
15140ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
15150ac07820SSatish Balay   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1516ca161407SBarry Smith   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
15170ac07820SSatish Balay   nrecvs = work[rank];
1518ca161407SBarry Smith   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
15190ac07820SSatish Balay   nmax   = work[rank];
15200ac07820SSatish Balay   PetscFree(work);
15210ac07820SSatish Balay 
15220ac07820SSatish Balay   /* post receives:   */
1523d64ed03dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1524d64ed03dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
15250ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
1526ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
15270ac07820SSatish Balay   }
15280ac07820SSatish Balay 
15290ac07820SSatish Balay   /* do sends:
15300ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
15310ac07820SSatish Balay      the ith processor
15320ac07820SSatish Balay   */
15330ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1534ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
15350ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
15360ac07820SSatish Balay   starts[0] = 0;
15370ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
15380ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
15390ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
15400ac07820SSatish Balay   }
15410ac07820SSatish Balay   ISRestoreIndices(is,&rows);
15420ac07820SSatish Balay 
15430ac07820SSatish Balay   starts[0] = 0;
15440ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
15450ac07820SSatish Balay   count = 0;
15460ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
15470ac07820SSatish Balay     if (procs[i]) {
1548ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
15490ac07820SSatish Balay     }
15500ac07820SSatish Balay   }
15510ac07820SSatish Balay   PetscFree(starts);
15520ac07820SSatish Balay 
15530ac07820SSatish Balay   base = owners[rank]*bs;
15540ac07820SSatish Balay 
15550ac07820SSatish Balay   /*  wait on receives */
15560ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
15570ac07820SSatish Balay   source = lens + nrecvs;
15580ac07820SSatish Balay   count  = nrecvs; slen = 0;
15590ac07820SSatish Balay   while (count) {
1560ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
15610ac07820SSatish Balay     /* unpack receives into our local space */
1562ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
15630ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
15640ac07820SSatish Balay     lens[imdex]  = n;
15650ac07820SSatish Balay     slen += n;
15660ac07820SSatish Balay     count--;
15670ac07820SSatish Balay   }
15680ac07820SSatish Balay   PetscFree(recv_waits);
15690ac07820SSatish Balay 
15700ac07820SSatish Balay   /* move the data into the send scatter */
15710ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
15720ac07820SSatish Balay   count = 0;
15730ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
15740ac07820SSatish Balay     values = rvalues + i*nmax;
15750ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
15760ac07820SSatish Balay       lrows[count++] = values[j] - base;
15770ac07820SSatish Balay     }
15780ac07820SSatish Balay   }
15790ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
15800ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
15810ac07820SSatish Balay 
15820ac07820SSatish Balay   /* actually zap the local rows */
1583029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
15840ac07820SSatish Balay   PLogObjectParent(A,istmp);
1585a07cd24cSSatish Balay 
1586a07cd24cSSatish Balay   ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
15870ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
15880ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
15890ac07820SSatish Balay 
1590a07cd24cSSatish Balay   if (diag) {
1591a07cd24cSSatish Balay     for ( i = 0; i < slen; i++ ) {
1592a07cd24cSSatish Balay       row = lrows[i] + rstart_bs;
1593a07cd24cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1594a07cd24cSSatish Balay     }
1595a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1596a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1597a07cd24cSSatish Balay   }
1598a07cd24cSSatish Balay   PetscFree(lrows);
1599a07cd24cSSatish Balay 
16000ac07820SSatish Balay   /* wait on sends */
16010ac07820SSatish Balay   if (nsends) {
1602d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1603ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
16040ac07820SSatish Balay     PetscFree(send_status);
16050ac07820SSatish Balay   }
16060ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
16070ac07820SSatish Balay 
16083a40ed3dSBarry Smith   PetscFunctionReturn(0);
16090ac07820SSatish Balay }
1610ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
16115615d1e5SSatish Balay #undef __FUNC__
16125615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1613ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1614ba4ca20aSSatish Balay {
1615ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
16163a40ed3dSBarry Smith   int         ierr;
1617ba4ca20aSSatish Balay 
1618d64ed03dSBarry Smith   PetscFunctionBegin;
16193a40ed3dSBarry Smith   if (!a->rank) {
16203a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
16213a40ed3dSBarry Smith   }
16223a40ed3dSBarry Smith   PetscFunctionReturn(0);
1623ba4ca20aSSatish Balay }
16240ac07820SSatish Balay 
16255615d1e5SSatish Balay #undef __FUNC__
16265615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1627ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1628bb5a7306SBarry Smith {
1629bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1630bb5a7306SBarry Smith   int         ierr;
1631d64ed03dSBarry Smith 
1632d64ed03dSBarry Smith   PetscFunctionBegin;
1633bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
16343a40ed3dSBarry Smith   PetscFunctionReturn(0);
1635bb5a7306SBarry Smith }
1636bb5a7306SBarry Smith 
16370ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
16380ac07820SSatish Balay 
163979bdfe76SSatish Balay /* -------------------------------------------------------------------*/
164079bdfe76SSatish Balay static struct _MatOps MatOps = {
1641bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
16424c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
16434c50302cSBarry Smith   0,0,0,0,
16440ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
16450e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
164658667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
16474c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
16484c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
16494c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
165094a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1651d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1652ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1653bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1654ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
165579bdfe76SSatish Balay 
165679bdfe76SSatish Balay 
16575615d1e5SSatish Balay #undef __FUNC__
16585615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
165979bdfe76SSatish Balay /*@C
166079bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
166179bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
166279bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
166379bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
166479bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
166579bdfe76SSatish Balay 
166679bdfe76SSatish Balay    Input Parameters:
166779bdfe76SSatish Balay .  comm - MPI communicator
166879bdfe76SSatish Balay .  bs   - size of blockk
166979bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
167092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
167192e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
167292e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
167392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
167492e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
167579bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
167692e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
167779bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
167879bdfe76SSatish Balay            submatrix  (same for all local rows)
167992e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
168092e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
168192e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
168292e8d321SLois Curfman McInnes            it is zero.
168392e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
168479bdfe76SSatish Balay            submatrix (same for all local rows).
168592e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
168692e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
168792e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
168879bdfe76SSatish Balay 
168979bdfe76SSatish Balay    Output Parameter:
169079bdfe76SSatish Balay .  A - the matrix
169179bdfe76SSatish Balay 
169279bdfe76SSatish Balay    Notes:
169379bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
169479bdfe76SSatish Balay    (possibly both).
169579bdfe76SSatish Balay 
169679bdfe76SSatish Balay    Storage Information:
169779bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
169879bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
169979bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
170079bdfe76SSatish Balay    local matrix (a rectangular submatrix).
170179bdfe76SSatish Balay 
170279bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
170379bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
170479bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
170579bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
170679bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
170779bdfe76SSatish Balay 
170879bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
170979bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
171079bdfe76SSatish Balay 
171179bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
171279bdfe76SSatish Balay $         -------------------
171379bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
171479bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
171579bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
171679bdfe76SSatish Balay $         -------------------
171779bdfe76SSatish Balay $
171879bdfe76SSatish Balay 
171979bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
172079bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
172179bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
172257b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
172379bdfe76SSatish Balay 
1724d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1725d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
172679bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
172792e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
172892e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
17296da5968aSLois Curfman McInnes    matrices.
173079bdfe76SSatish Balay 
173192e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
173279bdfe76SSatish Balay 
173379bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
173479bdfe76SSatish Balay @*/
173579bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
173679bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
173779bdfe76SSatish Balay {
173879bdfe76SSatish Balay   Mat          B;
173979bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
17404c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
174179bdfe76SSatish Balay 
1742d64ed03dSBarry Smith   PetscFunctionBegin;
1743a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
17443914022bSBarry Smith 
17453914022bSBarry Smith   MPI_Comm_size(comm,&size);
17463914022bSBarry Smith   if (size == 1) {
17473914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
17483914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
17493914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
17503a40ed3dSBarry Smith     PetscFunctionReturn(0);
17513914022bSBarry Smith   }
17523914022bSBarry Smith 
175379bdfe76SSatish Balay   *A = 0;
1754d4bb536fSBarry Smith   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
175579bdfe76SSatish Balay   PLogObjectCreate(B);
175679bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
175779bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
175879bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
17594c50302cSBarry Smith 
176079bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
176179bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
176290f02eecSBarry Smith   B->mapping    = 0;
176379bdfe76SSatish Balay   B->factor     = 0;
176479bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
176579bdfe76SSatish Balay 
1766e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
176779bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
176879bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
176979bdfe76SSatish Balay 
1770d64ed03dSBarry Smith   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1771a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1772d64ed03dSBarry Smith   }
1773a8c6a408SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1774a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1775a8c6a408SBarry Smith   }
1776a8c6a408SBarry Smith   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1777a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1778a8c6a408SBarry Smith   }
1779cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1780cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
178179bdfe76SSatish Balay 
178279bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
178379bdfe76SSatish Balay     work[0] = m; work[1] = n;
178479bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
1785ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
178679bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
178779bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
178879bdfe76SSatish Balay   }
178979bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
179079bdfe76SSatish Balay     Mbs = M/bs;
1791a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
179279bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
179379bdfe76SSatish Balay     m   = mbs*bs;
179479bdfe76SSatish Balay   }
179579bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
179679bdfe76SSatish Balay     Nbs = N/bs;
1797a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
179879bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
179979bdfe76SSatish Balay     n   = nbs*bs;
180079bdfe76SSatish Balay   }
1801a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
1802a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1803a8c6a408SBarry Smith   }
180479bdfe76SSatish Balay 
180579bdfe76SSatish Balay   b->m = m; B->m = m;
180679bdfe76SSatish Balay   b->n = n; B->n = n;
180779bdfe76SSatish Balay   b->N = N; B->N = N;
180879bdfe76SSatish Balay   b->M = M; B->M = M;
180979bdfe76SSatish Balay   b->bs  = bs;
181079bdfe76SSatish Balay   b->bs2 = bs*bs;
181179bdfe76SSatish Balay   b->mbs = mbs;
181279bdfe76SSatish Balay   b->nbs = nbs;
181379bdfe76SSatish Balay   b->Mbs = Mbs;
181479bdfe76SSatish Balay   b->Nbs = Nbs;
181579bdfe76SSatish Balay 
181679bdfe76SSatish Balay   /* build local table of row and column ownerships */
181779bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1818f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
18190ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
1820ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
182179bdfe76SSatish Balay   b->rowners[0] = 0;
182279bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
182379bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
182479bdfe76SSatish Balay   }
182579bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
182679bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
18274fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
18284fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
18294fa0d573SSatish Balay 
1830ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
183179bdfe76SSatish Balay   b->cowners[0] = 0;
183279bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
183379bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
183479bdfe76SSatish Balay   }
183579bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
183679bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
18374fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
18384fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
183979bdfe76SSatish Balay 
1840a07cd24cSSatish Balay 
184179bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1842029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
184379bdfe76SSatish Balay   PLogObjectParent(B,b->A);
184479bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1845029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
184679bdfe76SSatish Balay   PLogObjectParent(B,b->B);
184779bdfe76SSatish Balay 
184879bdfe76SSatish Balay   /* build cache for off array entries formed */
184979bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
185090f02eecSBarry Smith   b->donotstash  = 0;
185179bdfe76SSatish Balay   b->colmap      = 0;
185279bdfe76SSatish Balay   b->garray      = 0;
185379bdfe76SSatish Balay   b->roworiented = 1;
185479bdfe76SSatish Balay 
185530793edcSSatish Balay   /* stuff used in block assembly */
185630793edcSSatish Balay   b->barray       = 0;
185730793edcSSatish Balay 
185879bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
185979bdfe76SSatish Balay   b->lvec         = 0;
186079bdfe76SSatish Balay   b->Mvctx        = 0;
186179bdfe76SSatish Balay 
186279bdfe76SSatish Balay   /* stuff for MatGetRow() */
186379bdfe76SSatish Balay   b->rowindices   = 0;
186479bdfe76SSatish Balay   b->rowvalues    = 0;
186579bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
186679bdfe76SSatish Balay 
1867a07cd24cSSatish Balay   /* hash table stuff */
1868a07cd24cSSatish Balay   b->ht          = 0;
1869*0bdbc534SSatish Balay   b->ht_size     = 0;
1870a07cd24cSSatish Balay 
187179bdfe76SSatish Balay   *A = B;
18723a40ed3dSBarry Smith   PetscFunctionReturn(0);
187379bdfe76SSatish Balay }
1874026e39d0SSatish Balay 
18755615d1e5SSatish Balay #undef __FUNC__
18765615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
18770ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
18780ac07820SSatish Balay {
18790ac07820SSatish Balay   Mat         mat;
18800ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
18810ac07820SSatish Balay   int         ierr, len=0, flg;
18820ac07820SSatish Balay 
1883d64ed03dSBarry Smith   PetscFunctionBegin;
18840ac07820SSatish Balay   *newmat       = 0;
1885d4bb536fSBarry Smith   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
18860ac07820SSatish Balay   PLogObjectCreate(mat);
18870ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
18880ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
18890ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
18900ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
18910ac07820SSatish Balay   mat->factor     = matin->factor;
18920ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
18930ac07820SSatish Balay 
18940ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
18950ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
18960ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
18970ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
18980ac07820SSatish Balay 
18990ac07820SSatish Balay   a->bs  = oldmat->bs;
19000ac07820SSatish Balay   a->bs2 = oldmat->bs2;
19010ac07820SSatish Balay   a->mbs = oldmat->mbs;
19020ac07820SSatish Balay   a->nbs = oldmat->nbs;
19030ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
19040ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
19050ac07820SSatish Balay 
19060ac07820SSatish Balay   a->rstart       = oldmat->rstart;
19070ac07820SSatish Balay   a->rend         = oldmat->rend;
19080ac07820SSatish Balay   a->cstart       = oldmat->cstart;
19090ac07820SSatish Balay   a->cend         = oldmat->cend;
19100ac07820SSatish Balay   a->size         = oldmat->size;
19110ac07820SSatish Balay   a->rank         = oldmat->rank;
1912e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
19130ac07820SSatish Balay   a->rowvalues    = 0;
19140ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
191530793edcSSatish Balay   a->barray       = 0;
19160ac07820SSatish Balay 
19170ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1918f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
19190ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
19200ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
19210ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
19220ac07820SSatish Balay   if (oldmat->colmap) {
19230ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
19240ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
19250ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
19260ac07820SSatish Balay   } else a->colmap = 0;
19270ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
19280ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
19290ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
19300ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
19310ac07820SSatish Balay   } else a->garray = 0;
19320ac07820SSatish Balay 
19330ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
19340ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
19350ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
19360ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
19370ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
19380ac07820SSatish Balay   PLogObjectParent(mat,a->A);
19390ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
19400ac07820SSatish Balay   PLogObjectParent(mat,a->B);
19410ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
19420ac07820SSatish Balay   if (flg) {
19430ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
19440ac07820SSatish Balay   }
19450ac07820SSatish Balay   *newmat = mat;
19463a40ed3dSBarry Smith   PetscFunctionReturn(0);
19470ac07820SSatish Balay }
194857b952d6SSatish Balay 
194957b952d6SSatish Balay #include "sys.h"
195057b952d6SSatish Balay 
19515615d1e5SSatish Balay #undef __FUNC__
19525615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
195357b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
195457b952d6SSatish Balay {
195557b952d6SSatish Balay   Mat          A;
195657b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
195757b952d6SSatish Balay   Scalar       *vals,*buf;
195857b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
195957b952d6SSatish Balay   MPI_Status   status;
1960cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
196157b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
196257b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
196357b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
196457b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
196557b952d6SSatish Balay 
1966d64ed03dSBarry Smith   PetscFunctionBegin;
196757b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
196857b952d6SSatish Balay   bs2  = bs*bs;
196957b952d6SSatish Balay 
197057b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
197157b952d6SSatish Balay   if (!rank) {
197257b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1973e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1974a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1975d64ed03dSBarry Smith     if (header[3] < 0) {
1976a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
1977d64ed03dSBarry Smith     }
19786c5fab8fSBarry Smith   }
1979d64ed03dSBarry Smith 
1980ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
198157b952d6SSatish Balay   M = header[1]; N = header[2];
198257b952d6SSatish Balay 
1983a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
198457b952d6SSatish Balay 
198557b952d6SSatish Balay   /*
198657b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
198757b952d6SSatish Balay      divisible by the blocksize
198857b952d6SSatish Balay   */
198957b952d6SSatish Balay   Mbs        = M/bs;
199057b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
199157b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
199257b952d6SSatish Balay   else                  Mbs++;
199357b952d6SSatish Balay   if (extra_rows &&!rank) {
1994b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
199557b952d6SSatish Balay   }
1996537820f0SBarry Smith 
199757b952d6SSatish Balay   /* determine ownership of all rows */
199857b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
199957b952d6SSatish Balay   m   = mbs * bs;
2000cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2001cee3aa6bSSatish Balay   browners = rowners + size + 1;
2002ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
200357b952d6SSatish Balay   rowners[0] = 0;
2004cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2005cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
200657b952d6SSatish Balay   rstart = rowners[rank];
200757b952d6SSatish Balay   rend   = rowners[rank+1];
200857b952d6SSatish Balay 
200957b952d6SSatish Balay   /* distribute row lengths to all processors */
201057b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
201157b952d6SSatish Balay   if (!rank) {
201257b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2013e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
201457b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
201557b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2016cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2017ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
201857b952d6SSatish Balay     PetscFree(sndcounts);
2019d64ed03dSBarry Smith   } else {
2020ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
202157b952d6SSatish Balay   }
202257b952d6SSatish Balay 
202357b952d6SSatish Balay   if (!rank) {
202457b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
202557b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
202657b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
202757b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
202857b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
202957b952d6SSatish Balay         procsnz[i] += rowlengths[j];
203057b952d6SSatish Balay       }
203157b952d6SSatish Balay     }
203257b952d6SSatish Balay     PetscFree(rowlengths);
203357b952d6SSatish Balay 
203457b952d6SSatish Balay     /* determine max buffer needed and allocate it */
203557b952d6SSatish Balay     maxnz = 0;
203657b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
203757b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
203857b952d6SSatish Balay     }
203957b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
204057b952d6SSatish Balay 
204157b952d6SSatish Balay     /* read in my part of the matrix column indices  */
204257b952d6SSatish Balay     nz = procsnz[0];
204357b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
204457b952d6SSatish Balay     mycols = ibuf;
2045cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2046e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2047cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2048cee3aa6bSSatish Balay 
204957b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
205057b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
205157b952d6SSatish Balay       nz   = procsnz[i];
2052e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2053ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
205457b952d6SSatish Balay     }
205557b952d6SSatish Balay     /* read in the stuff for the last proc */
205657b952d6SSatish Balay     if ( size != 1 ) {
205757b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2058e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
205957b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2060ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
206157b952d6SSatish Balay     }
206257b952d6SSatish Balay     PetscFree(cols);
2063d64ed03dSBarry Smith   } else {
206457b952d6SSatish Balay     /* determine buffer space needed for message */
206557b952d6SSatish Balay     nz = 0;
206657b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
206757b952d6SSatish Balay       nz += locrowlens[i];
206857b952d6SSatish Balay     }
206957b952d6SSatish Balay     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
207057b952d6SSatish Balay     mycols = ibuf;
207157b952d6SSatish Balay     /* receive message of column indices*/
2072ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2073ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2074a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
207557b952d6SSatish Balay   }
207657b952d6SSatish Balay 
207757b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2078cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2079cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
208057b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2081cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
208257b952d6SSatish Balay   masked1 = mask    + Mbs;
208357b952d6SSatish Balay   masked2 = masked1 + Mbs;
208457b952d6SSatish Balay   rowcount = 0; nzcount = 0;
208557b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
208657b952d6SSatish Balay     dcount  = 0;
208757b952d6SSatish Balay     odcount = 0;
208857b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
208957b952d6SSatish Balay       kmax = locrowlens[rowcount];
209057b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
209157b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
209257b952d6SSatish Balay         if (!mask[tmp]) {
209357b952d6SSatish Balay           mask[tmp] = 1;
209457b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
209557b952d6SSatish Balay           else masked1[dcount++] = tmp;
209657b952d6SSatish Balay         }
209757b952d6SSatish Balay       }
209857b952d6SSatish Balay       rowcount++;
209957b952d6SSatish Balay     }
2100cee3aa6bSSatish Balay 
210157b952d6SSatish Balay     dlens[i]  = dcount;
210257b952d6SSatish Balay     odlens[i] = odcount;
2103cee3aa6bSSatish Balay 
210457b952d6SSatish Balay     /* zero out the mask elements we set */
210557b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
210657b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
210757b952d6SSatish Balay   }
2108cee3aa6bSSatish Balay 
210957b952d6SSatish Balay   /* create our matrix */
2110537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2111537820f0SBarry Smith          CHKERRQ(ierr);
211257b952d6SSatish Balay   A = *newmat;
21136d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
211457b952d6SSatish Balay 
211557b952d6SSatish Balay   if (!rank) {
211657b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
211757b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
211857b952d6SSatish Balay     nz = procsnz[0];
211957b952d6SSatish Balay     vals = buf;
2120cee3aa6bSSatish Balay     mycols = ibuf;
2121cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2122e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2123cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2124537820f0SBarry Smith 
212557b952d6SSatish Balay     /* insert into matrix */
212657b952d6SSatish Balay     jj      = rstart*bs;
212757b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
212857b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
212957b952d6SSatish Balay       mycols += locrowlens[i];
213057b952d6SSatish Balay       vals   += locrowlens[i];
213157b952d6SSatish Balay       jj++;
213257b952d6SSatish Balay     }
213357b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
213457b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
213557b952d6SSatish Balay       nz   = procsnz[i];
213657b952d6SSatish Balay       vals = buf;
2137e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2138ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
213957b952d6SSatish Balay     }
214057b952d6SSatish Balay     /* the last proc */
214157b952d6SSatish Balay     if ( size != 1 ){
214257b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2143cee3aa6bSSatish Balay       vals = buf;
2144e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
214557b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2146ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
214757b952d6SSatish Balay     }
214857b952d6SSatish Balay     PetscFree(procsnz);
2149d64ed03dSBarry Smith   } else {
215057b952d6SSatish Balay     /* receive numeric values */
215157b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
215257b952d6SSatish Balay 
215357b952d6SSatish Balay     /* receive message of values*/
215457b952d6SSatish Balay     vals   = buf;
2155cee3aa6bSSatish Balay     mycols = ibuf;
2156ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2157ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2158a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
215957b952d6SSatish Balay 
216057b952d6SSatish Balay     /* insert into matrix */
216157b952d6SSatish Balay     jj      = rstart*bs;
2162cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
216357b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
216457b952d6SSatish Balay       mycols += locrowlens[i];
216557b952d6SSatish Balay       vals   += locrowlens[i];
216657b952d6SSatish Balay       jj++;
216757b952d6SSatish Balay     }
216857b952d6SSatish Balay   }
216957b952d6SSatish Balay   PetscFree(locrowlens);
217057b952d6SSatish Balay   PetscFree(buf);
217157b952d6SSatish Balay   PetscFree(ibuf);
217257b952d6SSatish Balay   PetscFree(rowners);
217357b952d6SSatish Balay   PetscFree(dlens);
2174cee3aa6bSSatish Balay   PetscFree(mask);
21756d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
21766d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
21773a40ed3dSBarry Smith   PetscFunctionReturn(0);
217857b952d6SSatish Balay }
217957b952d6SSatish Balay 
218057b952d6SSatish Balay 
2181