xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 3a40ed3dce77c081171d005ae1a6ff4bb9d13b6f)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*3a40ed3dSBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.82 1997/09/25 21:43:23 bsmith Exp bsmith $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
143b2fbd54SBarry Smith 
15537820f0SBarry Smith /*
16537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
1757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
1857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
1957b952d6SSatish Balay    length of colmap equals the global matrix length.
2057b952d6SSatish Balay */
215615d1e5SSatish Balay #undef __FUNC__
225615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
2357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2457b952d6SSatish Balay {
2557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
27928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
2857b952d6SSatish Balay 
29758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3057b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3157b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
32928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
33*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
3457b952d6SSatish Balay }
3557b952d6SSatish Balay 
3680c1aa95SSatish Balay #define CHUNKSIZE  10
3780c1aa95SSatish Balay 
38f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
3980c1aa95SSatish Balay { \
4080c1aa95SSatish Balay  \
4180c1aa95SSatish Balay     brow = row/bs;  \
4280c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
43ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
4480c1aa95SSatish Balay       bcol = col/bs; \
4580c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
46ab26458aSBarry Smith       low = 0; high = nrow; \
47ab26458aSBarry Smith       while (high-low > 3) { \
48ab26458aSBarry Smith         t = (low+high)/2; \
49ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
50ab26458aSBarry Smith         else              low  = t; \
51ab26458aSBarry Smith       } \
52ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
5380c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
5480c1aa95SSatish Balay         if (rp[_i] == bcol) { \
5580c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
56eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
57eada6651SSatish Balay           else                    *bap  = value;  \
58ac7a638eSSatish Balay           goto a_noinsert; \
5980c1aa95SSatish Balay         } \
6080c1aa95SSatish Balay       } \
6189280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
6211ebbc71SLois Curfman McInnes       else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
6380c1aa95SSatish Balay       if (nrow >= rmax) { \
6480c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
6580c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
6680c1aa95SSatish Balay         Scalar *new_a; \
6780c1aa95SSatish Balay  \
6811ebbc71SLois Curfman McInnes         if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
6989280ab3SLois Curfman McInnes  \
7080c1aa95SSatish Balay         /* malloc new storage space */ \
7180c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
7280c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
7380c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
7480c1aa95SSatish Balay         new_i   = new_j + new_nz; \
7580c1aa95SSatish Balay  \
7680c1aa95SSatish Balay         /* copy over old data into new slots */ \
7780c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
7880c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
7980c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
8080c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
8180c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
8280c1aa95SSatish Balay                                                            len*sizeof(int)); \
8380c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
8480c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
8580c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
8680c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
8780c1aa95SSatish Balay         /* free up old matrix storage */ \
8880c1aa95SSatish Balay         PetscFree(a->a);  \
8980c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
9080c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
9180c1aa95SSatish Balay         a->singlemalloc = 1; \
9280c1aa95SSatish Balay  \
9380c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
94ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
9580c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
9680c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
9780c1aa95SSatish Balay         a->reallocs++; \
9880c1aa95SSatish Balay         a->nz++; \
9980c1aa95SSatish Balay       } \
10080c1aa95SSatish Balay       N = nrow++ - 1;  \
10180c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
10280c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
10380c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
10480c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
10580c1aa95SSatish Balay       } \
10680c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
10780c1aa95SSatish Balay       rp[_i]                      = bcol;  \
10880c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
109ac7a638eSSatish Balay       a_noinsert:; \
11080c1aa95SSatish Balay     ailen[brow] = nrow; \
11180c1aa95SSatish Balay }
11257b952d6SSatish Balay 
113ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
114ac7a638eSSatish Balay { \
115ac7a638eSSatish Balay  \
116ac7a638eSSatish Balay     brow = row/bs;  \
117ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
118ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
119ac7a638eSSatish Balay       bcol = col/bs; \
120ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
121ac7a638eSSatish Balay       low = 0; high = nrow; \
122ac7a638eSSatish Balay       while (high-low > 3) { \
123ac7a638eSSatish Balay         t = (low+high)/2; \
124ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
125ac7a638eSSatish Balay         else              low  = t; \
126ac7a638eSSatish Balay       } \
127ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
128ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
129ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
130ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
131ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
132ac7a638eSSatish Balay           else                    *bap  = value;  \
133ac7a638eSSatish Balay           goto b_noinsert; \
134ac7a638eSSatish Balay         } \
135ac7a638eSSatish Balay       } \
13689280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
13711ebbc71SLois Curfman McInnes       else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
138ac7a638eSSatish Balay       if (nrow >= rmax) { \
139ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
14074c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
141ac7a638eSSatish Balay         Scalar *new_a; \
142ac7a638eSSatish Balay  \
14311ebbc71SLois Curfman McInnes         if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
14489280ab3SLois Curfman McInnes  \
145ac7a638eSSatish Balay         /* malloc new storage space */ \
14674c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
147ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
148ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
149ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
150ac7a638eSSatish Balay  \
151ac7a638eSSatish Balay         /* copy over old data into new slots */ \
152ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
15374c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
154ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
155ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
156ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
157ac7a638eSSatish Balay                                                            len*sizeof(int)); \
158ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
159ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
160ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
161ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
162ac7a638eSSatish Balay         /* free up old matrix storage */ \
16374c639caSSatish Balay         PetscFree(b->a);  \
16474c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
16574c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
16674c639caSSatish Balay         b->singlemalloc = 1; \
167ac7a638eSSatish Balay  \
168ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
169ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
17074c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
17174c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
17274c639caSSatish Balay         b->reallocs++; \
17374c639caSSatish Balay         b->nz++; \
174ac7a638eSSatish Balay       } \
175ac7a638eSSatish Balay       N = nrow++ - 1;  \
176ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
177ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
178ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
179ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
180ac7a638eSSatish Balay       } \
181ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
182ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
183ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
184ac7a638eSSatish Balay       b_noinsert:; \
185ac7a638eSSatish Balay     bilen[brow] = nrow; \
186ac7a638eSSatish Balay }
187ac7a638eSSatish Balay 
1885615d1e5SSatish Balay #undef __FUNC__
1895615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
190ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
19157b952d6SSatish Balay {
19257b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
19357b952d6SSatish Balay   Scalar      value;
1944fa0d573SSatish Balay   int         ierr,i,j,row,col;
1954fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1964fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1974fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
19857b952d6SSatish Balay 
199eada6651SSatish Balay   /* Some Variables required in the macro */
20080c1aa95SSatish Balay   Mat         A = baij->A;
20180c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
202ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203ac7a638eSSatish Balay   Scalar      *aa=a->a;
204ac7a638eSSatish Balay 
205ac7a638eSSatish Balay   Mat         B = baij->B;
206ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
207ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208ac7a638eSSatish Balay   Scalar      *ba=b->a;
209ac7a638eSSatish Balay 
210ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
211ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
212ac7a638eSSatish Balay   Scalar      *ap,*bap;
21380c1aa95SSatish Balay 
21457b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
215*3a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
216e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
217e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
218639f9d9dSBarry Smith #endif
21957b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
22057b952d6SSatish Balay       row = im[i] - rstart_orig;
22157b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
22257b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
22357b952d6SSatish Balay           col = in[j] - cstart_orig;
22457b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
225f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
22680c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
22757b952d6SSatish Balay         }
228*3a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
229e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
230e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
231639f9d9dSBarry Smith #endif
23257b952d6SSatish Balay         else {
23357b952d6SSatish Balay           if (mat->was_assembled) {
234905e6a2fSBarry Smith             if (!baij->colmap) {
235905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236905e6a2fSBarry Smith             }
237905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
23857b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
23957b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
24057b952d6SSatish Balay               col =  in[j];
2419bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2429bf004c3SSatish Balay               B = baij->B;
2439bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2449bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2459bf004c3SSatish Balay               ba=b->a;
24657b952d6SSatish Balay             }
24757b952d6SSatish Balay           }
24857b952d6SSatish Balay           else col = in[j];
24957b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
250ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
251ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25257b952d6SSatish Balay         }
25357b952d6SSatish Balay       }
25457b952d6SSatish Balay     }
25557b952d6SSatish Balay     else {
25690f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
25757b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
25857b952d6SSatish Balay       }
25957b952d6SSatish Balay       else {
26090f02eecSBarry Smith         if (!baij->donotstash) {
26157b952d6SSatish Balay           row = im[i];
26257b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
26357b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
26457b952d6SSatish Balay           }
26557b952d6SSatish Balay         }
26657b952d6SSatish Balay       }
26757b952d6SSatish Balay     }
26890f02eecSBarry Smith   }
269*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
27057b952d6SSatish Balay }
27157b952d6SSatish Balay 
272ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
273ab26458aSBarry Smith #undef __FUNC__
274ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
275ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
276ab26458aSBarry Smith {
277ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27830793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
279abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
280ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
281ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
282ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
283ab26458aSBarry Smith 
28430793edcSSatish Balay 
28530793edcSSatish Balay   if(!barray) {
28647513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
28730793edcSSatish Balay   }
28830793edcSSatish Balay 
289ab26458aSBarry Smith   if (roworiented) {
290ab26458aSBarry Smith     stepval = (n-1)*bs;
291ab26458aSBarry Smith   } else {
292ab26458aSBarry Smith     stepval = (m-1)*bs;
293ab26458aSBarry Smith   }
294ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
295*3a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
296ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
297ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
298ab26458aSBarry Smith #endif
299ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
300ab26458aSBarry Smith       row = im[i] - rstart;
301ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
30215b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
30315b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
30415b57d14SSatish Balay           barray = v + i*bs2;
30515b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
30615b57d14SSatish Balay           barray = v + j*bs2;
30715b57d14SSatish Balay         } else { /* Here a copy is required */
308ab26458aSBarry Smith           if (roworiented) {
309ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
310ab26458aSBarry Smith           } else {
311ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
312abef11f7SSatish Balay           }
31347513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
31447513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
31530793edcSSatish Balay               *barray++  = *value++;
31647513183SBarry Smith             }
31747513183SBarry Smith           }
31830793edcSSatish Balay           barray -=bs2;
31915b57d14SSatish Balay         }
320abef11f7SSatish Balay 
321abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
322abef11f7SSatish Balay           col  = in[j] - cstart;
32330793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
324ab26458aSBarry Smith         }
325*3a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
326ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
32747513183SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Column too large");}
328ab26458aSBarry Smith #endif
329ab26458aSBarry Smith         else {
330ab26458aSBarry Smith           if (mat->was_assembled) {
331ab26458aSBarry Smith             if (!baij->colmap) {
332ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
333ab26458aSBarry Smith             }
334a5eb4965SSatish Balay 
335*3a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
336a5eb4965SSatish Balay             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(1,0,"Incorrect colmap");}
337a5eb4965SSatish Balay #endif
338a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
339ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
341ab26458aSBarry Smith               col =  in[j];
342ab26458aSBarry Smith             }
343ab26458aSBarry Smith           }
344ab26458aSBarry Smith           else col = in[j];
34530793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346ab26458aSBarry Smith         }
347ab26458aSBarry Smith       }
348ab26458aSBarry Smith     }
349ab26458aSBarry Smith     else {
350ab26458aSBarry Smith       if (!baij->donotstash) {
351ab26458aSBarry Smith         if (roworiented ) {
352abef11f7SSatish Balay           row   = im[i]*bs;
353abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
354abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
355abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
356abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
357abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
358abef11f7SSatish Balay               }
359ab26458aSBarry Smith             }
360ab26458aSBarry Smith           }
361ab26458aSBarry Smith         }
362ab26458aSBarry Smith         else {
363ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
364abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
365abef11f7SSatish Balay             col   = in[j]*bs;
366abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
367abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
368abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
369ab26458aSBarry Smith               }
370ab26458aSBarry Smith             }
371ab26458aSBarry Smith           }
372abef11f7SSatish Balay         }
373abef11f7SSatish Balay       }
374ab26458aSBarry Smith     }
375ab26458aSBarry Smith   }
376*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
377ab26458aSBarry Smith }
378ab26458aSBarry Smith 
3795615d1e5SSatish Balay #undef __FUNC__
3805615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
381ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
382d6de1c52SSatish Balay {
383d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
384d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
385d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
386d6de1c52SSatish Balay 
387d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
388e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
389e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
390d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
391d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
392d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
393e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
394e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
395d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
396d6de1c52SSatish Balay           col = idxn[j] - bscstart;
397d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
398d6de1c52SSatish Balay         }
399d6de1c52SSatish Balay         else {
400905e6a2fSBarry Smith           if (!baij->colmap) {
401905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
402905e6a2fSBarry Smith           }
403e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
404dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
405d9d09a02SSatish Balay           else {
406dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
407d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
408d6de1c52SSatish Balay           }
409d6de1c52SSatish Balay         }
410d6de1c52SSatish Balay       }
411d9d09a02SSatish Balay     }
412d6de1c52SSatish Balay     else {
413e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
414d6de1c52SSatish Balay     }
415d6de1c52SSatish Balay   }
416*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
417d6de1c52SSatish Balay }
418d6de1c52SSatish Balay 
4195615d1e5SSatish Balay #undef __FUNC__
4205615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
421ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
422d6de1c52SSatish Balay {
423d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
424d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
425acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
426d6de1c52SSatish Balay   double     sum = 0.0;
427d6de1c52SSatish Balay   Scalar     *v;
428d6de1c52SSatish Balay 
429d6de1c52SSatish Balay   if (baij->size == 1) {
430d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
431d6de1c52SSatish Balay   } else {
432d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
433d6de1c52SSatish Balay       v = amat->a;
434d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
435*3a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
436d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
437d6de1c52SSatish Balay #else
438d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
439d6de1c52SSatish Balay #endif
440d6de1c52SSatish Balay       }
441d6de1c52SSatish Balay       v = bmat->a;
442d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
443*3a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
444d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
445d6de1c52SSatish Balay #else
446d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
447d6de1c52SSatish Balay #endif
448d6de1c52SSatish Balay       }
449d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
450d6de1c52SSatish Balay       *norm = sqrt(*norm);
451d6de1c52SSatish Balay     }
452acdf5bf4SSatish Balay     else
453e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
454d6de1c52SSatish Balay   }
455*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
456d6de1c52SSatish Balay }
45757b952d6SSatish Balay 
4585615d1e5SSatish Balay #undef __FUNC__
4595615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
460ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
46157b952d6SSatish Balay {
46257b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
46357b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
46457b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
46557b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
46657b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
46757b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
46857b952d6SSatish Balay   InsertMode  addv;
46957b952d6SSatish Balay   Scalar      *rvalues,*svalues;
47057b952d6SSatish Balay 
47157b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
472e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
47357b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
474e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
47557b952d6SSatish Balay   }
476e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
47757b952d6SSatish Balay 
47857b952d6SSatish Balay   /*  first count number of contributors to each processor */
47957b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
48057b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
48157b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
48257b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
48357b952d6SSatish Balay     idx = baij->stash.idx[i];
48457b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
48557b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
48657b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
48757b952d6SSatish Balay       }
48857b952d6SSatish Balay     }
48957b952d6SSatish Balay   }
49057b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
49157b952d6SSatish Balay 
49257b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
49357b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
49457b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
49557b952d6SSatish Balay   nreceives = work[rank];
49657b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
49757b952d6SSatish Balay   nmax = work[rank];
49857b952d6SSatish Balay   PetscFree(work);
49957b952d6SSatish Balay 
50057b952d6SSatish Balay   /* post receives:
50157b952d6SSatish Balay        1) each message will consist of ordered pairs
50257b952d6SSatish Balay      (global index,value) we store the global index as a double
50357b952d6SSatish Balay      to simplify the message passing.
50457b952d6SSatish Balay        2) since we don't know how long each individual message is we
50557b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
50657b952d6SSatish Balay      this is a lot of wasted space.
50757b952d6SSatish Balay 
50857b952d6SSatish Balay 
50957b952d6SSatish Balay        This could be done better.
51057b952d6SSatish Balay   */
51157b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
51257b952d6SSatish Balay   CHKPTRQ(rvalues);
51357b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
51457b952d6SSatish Balay   CHKPTRQ(recv_waits);
51557b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
51657b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
51757b952d6SSatish Balay               comm,recv_waits+i);
51857b952d6SSatish Balay   }
51957b952d6SSatish Balay 
52057b952d6SSatish Balay   /* do sends:
52157b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
52257b952d6SSatish Balay          the ith processor
52357b952d6SSatish Balay   */
52457b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
52557b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
52657b952d6SSatish Balay   CHKPTRQ(send_waits);
52757b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
52857b952d6SSatish Balay   starts[0] = 0;
52957b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
53057b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
53157b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
53257b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
53357b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
53457b952d6SSatish Balay   }
53557b952d6SSatish Balay   PetscFree(owner);
53657b952d6SSatish Balay   starts[0] = 0;
53757b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
53857b952d6SSatish Balay   count = 0;
53957b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
54057b952d6SSatish Balay     if (procs[i]) {
54157b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
54257b952d6SSatish Balay                 comm,send_waits+count++);
54357b952d6SSatish Balay     }
54457b952d6SSatish Balay   }
54557b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
54657b952d6SSatish Balay 
54757b952d6SSatish Balay   /* Free cache space */
548d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
54957b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
55057b952d6SSatish Balay 
55157b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
55257b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
55357b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
55457b952d6SSatish Balay   baij->rmax       = nmax;
55557b952d6SSatish Balay 
556*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
55757b952d6SSatish Balay }
558596b8d2eSBarry Smith #include <math.h>
559596b8d2eSBarry Smith #define HASH_KEY 0.6180339887
560bd7f49f5SSatish Balay #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1)))
561bd7f49f5SSatish Balay #define HASH2(size,key) ((int)((size)*fmod(((key+0.5)*HASH_KEY),1)))
56257b952d6SSatish Balay 
563bd7f49f5SSatish Balay 
564bd7f49f5SSatish Balay int CreateHashTable(Mat mat,double factor)
565596b8d2eSBarry Smith {
566596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
567596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
568596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
569bd7f49f5SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,h2,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
570bd7f49f5SSatish Balay   int         size=(int)(factor*nz),ct=0,max1=0,max2=0;
57152c87ff2SSatish Balay   /* Scalar      *aa=a->a,*ba=b->a; */
572596b8d2eSBarry Smith   double      key;
573596b8d2eSBarry Smith   static double *HT;
574596b8d2eSBarry Smith   static      int flag=1;
575bd7f49f5SSatish Balay   extern int PetscGlobalRank;
576bd7f49f5SSatish Balay   flag = 1;
577596b8d2eSBarry Smith   /* Allocate Memory for Hash Table */
578596b8d2eSBarry Smith   if (flag) {
579596b8d2eSBarry Smith     HT = (double*)PetscMalloc(size*sizeof(double));
580596b8d2eSBarry Smith     flag = 0;
581596b8d2eSBarry Smith   }
582596b8d2eSBarry Smith   PetscMemzero(HT,size*sizeof(double));
583596b8d2eSBarry Smith 
584596b8d2eSBarry Smith   /* Loop Over A */
585596b8d2eSBarry Smith   for ( i=0; i<a->n; i++ ) {
586596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
587596b8d2eSBarry Smith       key = i*baij->n+aj[j]+1;
588596b8d2eSBarry Smith       h1  = HASH1(size,key);
589bd7f49f5SSatish Balay       h2  = HASH2(size,key);
590596b8d2eSBarry Smith 
591596b8d2eSBarry Smith       for ( k=1; k<size; k++ ){
592596b8d2eSBarry Smith         if (HT[(h1*k)%size] == 0.0) {
593596b8d2eSBarry Smith           HT[(h1*k)%size] = key;
594596b8d2eSBarry Smith           break;
595596b8d2eSBarry Smith         } else ct++;
596596b8d2eSBarry Smith       }
597bd7f49f5SSatish Balay       if (k> max1) max1 =k;
598596b8d2eSBarry Smith     }
599596b8d2eSBarry Smith   }
600596b8d2eSBarry Smith   /* Loop Over B */
601596b8d2eSBarry Smith   for ( i=0; i<b->n; i++ ) {
602596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
603596b8d2eSBarry Smith       key = i*b->n+bj[j]+1;
604596b8d2eSBarry Smith       h1  = HASH1(size,key);
605596b8d2eSBarry Smith       for ( k=1; k<size; k++ ){
606596b8d2eSBarry Smith         if (HT[(h1*k)%size] == 0.0) {
607596b8d2eSBarry Smith           HT[(h1*k)%size] = key;
608596b8d2eSBarry Smith           break;
609596b8d2eSBarry Smith         } else ct++;
610596b8d2eSBarry Smith       }
611bd7f49f5SSatish Balay       if (k> max2) max2 =k;
612596b8d2eSBarry Smith     }
613596b8d2eSBarry Smith   }
614596b8d2eSBarry Smith 
615596b8d2eSBarry Smith   /* Print Summary */
616596b8d2eSBarry Smith   for ( i=0,key=0.0,j=0; i<size; i++)
617596b8d2eSBarry Smith     if (HT[i]) {j++;}
618596b8d2eSBarry Smith 
619bd7f49f5SSatish Balay   printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n",
620bd7f49f5SSatish Balay          PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j)/j,j);
621bd7f49f5SSatish Balay   PetscFree(HT);
622*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
623596b8d2eSBarry Smith }
62457b952d6SSatish Balay 
6255615d1e5SSatish Balay #undef __FUNC__
6265615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
627ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
62857b952d6SSatish Balay {
62957b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
63057b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
63157b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
632596b8d2eSBarry Smith   int         bs=baij->bs,row,col,other_disassembled,flg;
63357b952d6SSatish Balay   Scalar      *values,val;
634e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
63557b952d6SSatish Balay 
63657b952d6SSatish Balay   /*  wait on receives */
63757b952d6SSatish Balay   while (count) {
63857b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
63957b952d6SSatish Balay     /* unpack receives into our local space */
64057b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
64157b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
64257b952d6SSatish Balay     n = n/3;
64357b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
64457b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
64557b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
64657b952d6SSatish Balay       val = values[3*i+2];
64757b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
64857b952d6SSatish Balay         col -= baij->cstart*bs;
6496fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
65057b952d6SSatish Balay       }
65157b952d6SSatish Balay       else {
65257b952d6SSatish Balay         if (mat->was_assembled) {
653905e6a2fSBarry Smith           if (!baij->colmap) {
654905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
655905e6a2fSBarry Smith           }
656a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
65757b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
65857b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
65957b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
66057b952d6SSatish Balay           }
66157b952d6SSatish Balay         }
6626fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
66357b952d6SSatish Balay       }
66457b952d6SSatish Balay     }
66557b952d6SSatish Balay     count--;
66657b952d6SSatish Balay   }
66757b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
66857b952d6SSatish Balay 
66957b952d6SSatish Balay   /* wait on sends */
67057b952d6SSatish Balay   if (baij->nsends) {
67157b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
67257b952d6SSatish Balay     CHKPTRQ(send_status);
67357b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
67457b952d6SSatish Balay     PetscFree(send_status);
67557b952d6SSatish Balay   }
67657b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
67757b952d6SSatish Balay 
67857b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
67957b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
68057b952d6SSatish Balay 
68157b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
68257b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
68357b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
68457b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
68557b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
68657b952d6SSatish Balay   }
68757b952d6SSatish Balay 
6886d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
68957b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
69057b952d6SSatish Balay   }
69157b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
69257b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
69357b952d6SSatish Balay 
694596b8d2eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr);
695bd7f49f5SSatish Balay   if (flg) {
696bd7f49f5SSatish Balay     double fact;
697bd7f49f5SSatish Balay     for ( fact=1.2; fact<2.0; fact +=0.05) CreateHashTable(mat,fact);
698bd7f49f5SSatish Balay   }
69957b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
700*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
70157b952d6SSatish Balay }
70257b952d6SSatish Balay 
7035615d1e5SSatish Balay #undef __FUNC__
7045615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
70557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
70657b952d6SSatish Balay {
70757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
70857b952d6SSatish Balay   int          ierr;
70957b952d6SSatish Balay 
71057b952d6SSatish Balay   if (baij->size == 1) {
71157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
71257b952d6SSatish Balay   }
713e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
714*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
71557b952d6SSatish Balay }
71657b952d6SSatish Balay 
7175615d1e5SSatish Balay #undef __FUNC__
7185615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
71957b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
72057b952d6SSatish Balay {
72157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
722cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
72357b952d6SSatish Balay   FILE         *fd;
72457b952d6SSatish Balay   ViewerType   vtype;
72557b952d6SSatish Balay 
72657b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
72757b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
72857b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
729639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
7304e220ebcSLois Curfman McInnes       MatInfo info;
73157b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
73257b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
7334e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
73457b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
73557b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
7364e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
7374e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
7384e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
7394e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
7404e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
7414e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
74257b952d6SSatish Balay       fflush(fd);
74357b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
74457b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
745*3a40ed3dSBarry Smith       PetscFunctionReturn(0);
74657b952d6SSatish Balay     }
747639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
748bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
749*3a40ed3dSBarry Smith       PetscFunctionReturn(0);
75057b952d6SSatish Balay     }
75157b952d6SSatish Balay   }
75257b952d6SSatish Balay 
75357b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
75457b952d6SSatish Balay     Draw       draw;
75557b952d6SSatish Balay     PetscTruth isnull;
75657b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
757*3a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
75857b952d6SSatish Balay   }
75957b952d6SSatish Balay 
76057b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
76157b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
76257b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
76357b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
76457b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
76557b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
76657b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
76757b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
76857b952d6SSatish Balay     fflush(fd);
76957b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
77057b952d6SSatish Balay   }
77157b952d6SSatish Balay   else {
77257b952d6SSatish Balay     int size = baij->size;
77357b952d6SSatish Balay     rank = baij->rank;
77457b952d6SSatish Balay     if (size == 1) {
77557b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
77657b952d6SSatish Balay     }
77757b952d6SSatish Balay     else {
77857b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
77957b952d6SSatish Balay       Mat         A;
78057b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
78157b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
78257b952d6SSatish Balay       int         mbs=baij->mbs;
78357b952d6SSatish Balay       Scalar      *a;
78457b952d6SSatish Balay 
78557b952d6SSatish Balay       if (!rank) {
786cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
78757b952d6SSatish Balay         CHKERRQ(ierr);
78857b952d6SSatish Balay       }
78957b952d6SSatish Balay       else {
790cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
79157b952d6SSatish Balay         CHKERRQ(ierr);
79257b952d6SSatish Balay       }
79357b952d6SSatish Balay       PLogObjectParent(mat,A);
79457b952d6SSatish Balay 
79557b952d6SSatish Balay       /* copy over the A part */
79657b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
79757b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
79857b952d6SSatish Balay       row = baij->rstart;
79957b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
80057b952d6SSatish Balay 
80157b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
80257b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
80357b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
80457b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
80557b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
80657b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
807cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
808cee3aa6bSSatish Balay             col++; a += bs;
80957b952d6SSatish Balay           }
81057b952d6SSatish Balay         }
81157b952d6SSatish Balay       }
81257b952d6SSatish Balay       /* copy over the B part */
81357b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
81457b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
81557b952d6SSatish Balay       row = baij->rstart*bs;
81657b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
81757b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
81857b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
81957b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
82057b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
82157b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
822cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
823cee3aa6bSSatish Balay             col++; a += bs;
82457b952d6SSatish Balay           }
82557b952d6SSatish Balay         }
82657b952d6SSatish Balay       }
82757b952d6SSatish Balay       PetscFree(rvals);
8286d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8296d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
83057b952d6SSatish Balay       if (!rank) {
83157b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
83257b952d6SSatish Balay       }
83357b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
83457b952d6SSatish Balay     }
83557b952d6SSatish Balay   }
836*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
83757b952d6SSatish Balay }
83857b952d6SSatish Balay 
83957b952d6SSatish Balay 
84057b952d6SSatish Balay 
8415615d1e5SSatish Balay #undef __FUNC__
8425615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
843ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
84457b952d6SSatish Balay {
84557b952d6SSatish Balay   Mat         mat = (Mat) obj;
84657b952d6SSatish Balay   int         ierr;
84757b952d6SSatish Balay   ViewerType  vtype;
84857b952d6SSatish Balay 
84957b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
85057b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
85157b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
85257b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
853*3a40ed3dSBarry Smith   } else if (vtype == BINARY_FILE_VIEWER) {
854*3a40ed3dSBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
85557b952d6SSatish Balay   }
856*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
85757b952d6SSatish Balay }
85857b952d6SSatish Balay 
8595615d1e5SSatish Balay #undef __FUNC__
8605615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
861ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
86279bdfe76SSatish Balay {
86379bdfe76SSatish Balay   Mat         mat = (Mat) obj;
86479bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
86579bdfe76SSatish Balay   int         ierr;
86679bdfe76SSatish Balay 
867*3a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
86879bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
86979bdfe76SSatish Balay #endif
87079bdfe76SSatish Balay 
87183e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
87279bdfe76SSatish Balay   PetscFree(baij->rowners);
87379bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
87479bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
87579bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
87679bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
87779bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
87879bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
87979bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
88030793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
88179bdfe76SSatish Balay   PetscFree(baij);
88279bdfe76SSatish Balay   PLogObjectDestroy(mat);
88379bdfe76SSatish Balay   PetscHeaderDestroy(mat);
884*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
88579bdfe76SSatish Balay }
88679bdfe76SSatish Balay 
8875615d1e5SSatish Balay #undef __FUNC__
8885615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
889ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
890cee3aa6bSSatish Balay {
891cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
89247b4a8eaSLois Curfman McInnes   int         ierr, nt;
893cee3aa6bSSatish Balay 
894c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
89547b4a8eaSLois Curfman McInnes   if (nt != a->n) {
896ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
89747b4a8eaSLois Curfman McInnes   }
898c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
89947b4a8eaSLois Curfman McInnes   if (nt != a->m) {
900e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
90147b4a8eaSLois Curfman McInnes   }
90243a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
903cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
90443a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
905cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
90643a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
907*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
908cee3aa6bSSatish Balay }
909cee3aa6bSSatish Balay 
9105615d1e5SSatish Balay #undef __FUNC__
9115615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
912ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
913cee3aa6bSSatish Balay {
914cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
915cee3aa6bSSatish Balay   int        ierr;
91643a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
917cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
91843a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
919cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
920*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
921cee3aa6bSSatish Balay }
922cee3aa6bSSatish Balay 
9235615d1e5SSatish Balay #undef __FUNC__
9245615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
925ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
926cee3aa6bSSatish Balay {
927cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
928cee3aa6bSSatish Balay   int        ierr;
929cee3aa6bSSatish Balay 
930cee3aa6bSSatish Balay   /* do nondiagonal part */
931cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
932cee3aa6bSSatish Balay   /* send it on its way */
933537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
934cee3aa6bSSatish Balay   /* do local part */
935cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
936cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
937cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
938cee3aa6bSSatish Balay   /* but is not perhaps always true. */
939639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
940*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
941cee3aa6bSSatish Balay }
942cee3aa6bSSatish Balay 
9435615d1e5SSatish Balay #undef __FUNC__
9445615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
945ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
946cee3aa6bSSatish Balay {
947cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
948cee3aa6bSSatish Balay   int        ierr;
949cee3aa6bSSatish Balay 
950cee3aa6bSSatish Balay   /* do nondiagonal part */
951cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
952cee3aa6bSSatish Balay   /* send it on its way */
953537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
954cee3aa6bSSatish Balay   /* do local part */
955cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
956cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
957cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
958cee3aa6bSSatish Balay   /* but is not perhaps always true. */
959537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
960*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
961cee3aa6bSSatish Balay }
962cee3aa6bSSatish Balay 
963cee3aa6bSSatish Balay /*
964cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
965cee3aa6bSSatish Balay    diagonal block
966cee3aa6bSSatish Balay */
9675615d1e5SSatish Balay #undef __FUNC__
9685615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
969ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
970cee3aa6bSSatish Balay {
971cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
972*3a40ed3dSBarry Smith   int         ierr;
973*3a40ed3dSBarry Smith   if (a->M != a->N) SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
974*3a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
975*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
976cee3aa6bSSatish Balay }
977cee3aa6bSSatish Balay 
9785615d1e5SSatish Balay #undef __FUNC__
9795615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
980ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
981cee3aa6bSSatish Balay {
982cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
983cee3aa6bSSatish Balay   int        ierr;
984cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
985cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
986*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
987cee3aa6bSSatish Balay }
988026e39d0SSatish Balay 
9895615d1e5SSatish Balay #undef __FUNC__
9905615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
991ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
99257b952d6SSatish Balay {
99357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
994bd7f49f5SSatish Balay   if (m) *m = mat->M;
995bd7f49f5SSatish Balay   if (n) *n = mat->N;
996*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
99757b952d6SSatish Balay }
99857b952d6SSatish Balay 
9995615d1e5SSatish Balay #undef __FUNC__
10005615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1001ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
100257b952d6SSatish Balay {
100357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
100457b952d6SSatish Balay   *m = mat->m; *n = mat->N;
1005*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
100657b952d6SSatish Balay }
100757b952d6SSatish Balay 
10085615d1e5SSatish Balay #undef __FUNC__
10095615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1010ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
101157b952d6SSatish Balay {
101257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
101357b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1014*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
101557b952d6SSatish Balay }
101657b952d6SSatish Balay 
1017acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1018acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1019acdf5bf4SSatish Balay 
10205615d1e5SSatish Balay #undef __FUNC__
10215615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1022acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1023acdf5bf4SSatish Balay {
1024acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1025acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1026acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1027d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1028d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1029acdf5bf4SSatish Balay 
1030e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1031acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1032acdf5bf4SSatish Balay 
1033acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1034acdf5bf4SSatish Balay     /*
1035acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1036acdf5bf4SSatish Balay     */
1037acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1038bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1039bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1040acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1041acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1042acdf5bf4SSatish Balay     }
1043acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1044acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1045acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1046acdf5bf4SSatish Balay   }
1047acdf5bf4SSatish Balay 
1048acdf5bf4SSatish Balay 
1049e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
1050d9d09a02SSatish Balay   lrow = row - brstart;
1051acdf5bf4SSatish Balay 
1052acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1053acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1054acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1055acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1056acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1057acdf5bf4SSatish Balay   nztot = nzA + nzB;
1058acdf5bf4SSatish Balay 
1059acdf5bf4SSatish Balay   cmap  = mat->garray;
1060acdf5bf4SSatish Balay   if (v  || idx) {
1061acdf5bf4SSatish Balay     if (nztot) {
1062acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1063acdf5bf4SSatish Balay       int imark = -1;
1064acdf5bf4SSatish Balay       if (v) {
1065acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1066acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1067d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1068acdf5bf4SSatish Balay           else break;
1069acdf5bf4SSatish Balay         }
1070acdf5bf4SSatish Balay         imark = i;
1071acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1072acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1073acdf5bf4SSatish Balay       }
1074acdf5bf4SSatish Balay       if (idx) {
1075acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1076acdf5bf4SSatish Balay         if (imark > -1) {
1077acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1078bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1079acdf5bf4SSatish Balay           }
1080acdf5bf4SSatish Balay         } else {
1081acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1082d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1083d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1084acdf5bf4SSatish Balay             else break;
1085acdf5bf4SSatish Balay           }
1086acdf5bf4SSatish Balay           imark = i;
1087acdf5bf4SSatish Balay         }
1088d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1089d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1090acdf5bf4SSatish Balay       }
1091acdf5bf4SSatish Balay     }
1092d212a18eSSatish Balay     else {
1093d212a18eSSatish Balay       if (idx) *idx = 0;
1094d212a18eSSatish Balay       if (v)   *v   = 0;
1095d212a18eSSatish Balay     }
1096acdf5bf4SSatish Balay   }
1097acdf5bf4SSatish Balay   *nz = nztot;
1098acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1099acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1100*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
1101acdf5bf4SSatish Balay }
1102acdf5bf4SSatish Balay 
11035615d1e5SSatish Balay #undef __FUNC__
11045615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1105acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1106acdf5bf4SSatish Balay {
1107acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1108acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1109e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
1110acdf5bf4SSatish Balay   }
1111acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
1112*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
1113acdf5bf4SSatish Balay }
1114acdf5bf4SSatish Balay 
11155615d1e5SSatish Balay #undef __FUNC__
11165615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1117ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
11185a838052SSatish Balay {
11195a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
11205a838052SSatish Balay   *bs = baij->bs;
1121*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
11225a838052SSatish Balay }
11235a838052SSatish Balay 
11245615d1e5SSatish Balay #undef __FUNC__
11255615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1126ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
112758667388SSatish Balay {
112858667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
112958667388SSatish Balay   int         ierr;
113058667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
113158667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1132*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
113358667388SSatish Balay }
11340ac07820SSatish Balay 
11355615d1e5SSatish Balay #undef __FUNC__
11365615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1137ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
11380ac07820SSatish Balay {
11394e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
11404e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
11417d57db60SLois Curfman McInnes   int         ierr;
11427d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
11430ac07820SSatish Balay 
11444e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
11454e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
11464e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
11474e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
11484e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
11494e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
11504e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
11514e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
11524e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
11530ac07820SSatish Balay   if (flag == MAT_LOCAL) {
11544e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
11554e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
11564e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
11574e220ebcSLois Curfman McInnes     info->memory       = isend[3];
11584e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
11590ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1160dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
11614e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11624e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11634e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11644e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11654e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11660ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1167dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
11684e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11694e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11704e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11714e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11724e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11730ac07820SSatish Balay   }
11744e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
11754e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
11764e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
1177*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
11780ac07820SSatish Balay }
11790ac07820SSatish Balay 
11805615d1e5SSatish Balay #undef __FUNC__
11815615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1182ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
118358667388SSatish Balay {
118458667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
118558667388SSatish Balay 
118658667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
118758667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
11886da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1189c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
119096854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1191c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1192b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1193b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1194b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1195aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
119658667388SSatish Balay         MatSetOption(a->A,op);
119758667388SSatish Balay         MatSetOption(a->B,op);
1198b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
11996da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
120058667388SSatish Balay              op == MAT_SYMMETRIC ||
120158667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
120258667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
120358667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
120458667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
120558667388SSatish Balay     a->roworiented = 0;
120658667388SSatish Balay     MatSetOption(a->A,op);
120758667388SSatish Balay     MatSetOption(a->B,op);
12082b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
120990f02eecSBarry Smith     a->donotstash = 1;
121090f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1211e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
121258667388SSatish Balay   else
1213e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
1214*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
121558667388SSatish Balay }
121658667388SSatish Balay 
12175615d1e5SSatish Balay #undef __FUNC__
12185615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1219ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
12200ac07820SSatish Balay {
12210ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
12220ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
12230ac07820SSatish Balay   Mat        B;
12240ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
12250ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
12260ac07820SSatish Balay   Scalar     *a;
12270ac07820SSatish Balay 
12280ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1229e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
12300ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
12310ac07820SSatish Balay   CHKERRQ(ierr);
12320ac07820SSatish Balay 
12330ac07820SSatish Balay   /* copy over the A part */
12340ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
12350ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12360ac07820SSatish Balay   row = baij->rstart;
12370ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
12380ac07820SSatish Balay 
12390ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12400ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12410ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12420ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12430ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
12440ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12450ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12460ac07820SSatish Balay         col++; a += bs;
12470ac07820SSatish Balay       }
12480ac07820SSatish Balay     }
12490ac07820SSatish Balay   }
12500ac07820SSatish Balay   /* copy over the B part */
12510ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
12520ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12530ac07820SSatish Balay   row = baij->rstart*bs;
12540ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12550ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12560ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12570ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12580ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
12590ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12600ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12610ac07820SSatish Balay         col++; a += bs;
12620ac07820SSatish Balay       }
12630ac07820SSatish Balay     }
12640ac07820SSatish Balay   }
12650ac07820SSatish Balay   PetscFree(rvals);
12660ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12670ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12680ac07820SSatish Balay 
12690ac07820SSatish Balay   if (matout != PETSC_NULL) {
12700ac07820SSatish Balay     *matout = B;
12710ac07820SSatish Balay   } else {
12720ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
12730ac07820SSatish Balay     PetscFree(baij->rowners);
12740ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
12750ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
12760ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
12770ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
12780ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
12790ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
12800ac07820SSatish Balay     PetscFree(baij);
1281f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
12820ac07820SSatish Balay     PetscHeaderDestroy(B);
12830ac07820SSatish Balay   }
1284*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
12850ac07820SSatish Balay }
12860e95ebc0SSatish Balay 
12875615d1e5SSatish Balay #undef __FUNC__
12885615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
12890e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
12900e95ebc0SSatish Balay {
12910e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
12920e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
12930e95ebc0SSatish Balay   int ierr,s1,s2,s3;
12940e95ebc0SSatish Balay 
12950e95ebc0SSatish Balay   if (ll)  {
12960e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
12970e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1298e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
12990e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
13000e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
13010e95ebc0SSatish Balay   }
1302e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
1303*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
13040e95ebc0SSatish Balay }
13050e95ebc0SSatish Balay 
13060ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
13070ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
13080ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
13090ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
13100ac07820SSatish Balay    routine.
13110ac07820SSatish Balay */
13125615d1e5SSatish Balay #undef __FUNC__
13135615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1314ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
13150ac07820SSatish Balay {
13160ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
13170ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
13180ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
13190ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
13200ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
13210ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
13220ac07820SSatish Balay   MPI_Comm       comm = A->comm;
13230ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
13240ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
13250ac07820SSatish Balay   IS             istmp;
13260ac07820SSatish Balay 
13270ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
13280ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
13290ac07820SSatish Balay 
13300ac07820SSatish Balay   /*  first count number of contributors to each processor */
13310ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
13320ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
13330ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
13340ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13350ac07820SSatish Balay     idx = rows[i];
13360ac07820SSatish Balay     found = 0;
13370ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
13380ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
13390ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
13400ac07820SSatish Balay       }
13410ac07820SSatish Balay     }
1342e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
13430ac07820SSatish Balay   }
13440ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
13450ac07820SSatish Balay 
13460ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
13470ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
13480ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
13490ac07820SSatish Balay   nrecvs = work[rank];
13500ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
13510ac07820SSatish Balay   nmax = work[rank];
13520ac07820SSatish Balay   PetscFree(work);
13530ac07820SSatish Balay 
13540ac07820SSatish Balay   /* post receives:   */
13550ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
13560ac07820SSatish Balay   CHKPTRQ(rvalues);
13570ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
13580ac07820SSatish Balay   CHKPTRQ(recv_waits);
13590ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13600ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
13610ac07820SSatish Balay   }
13620ac07820SSatish Balay 
13630ac07820SSatish Balay   /* do sends:
13640ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
13650ac07820SSatish Balay          the ith processor
13660ac07820SSatish Balay   */
13670ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
13680ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
13690ac07820SSatish Balay   CHKPTRQ(send_waits);
13700ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
13710ac07820SSatish Balay   starts[0] = 0;
13720ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13730ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13740ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
13750ac07820SSatish Balay   }
13760ac07820SSatish Balay   ISRestoreIndices(is,&rows);
13770ac07820SSatish Balay 
13780ac07820SSatish Balay   starts[0] = 0;
13790ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13800ac07820SSatish Balay   count = 0;
13810ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
13820ac07820SSatish Balay     if (procs[i]) {
13830ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
13840ac07820SSatish Balay     }
13850ac07820SSatish Balay   }
13860ac07820SSatish Balay   PetscFree(starts);
13870ac07820SSatish Balay 
13880ac07820SSatish Balay   base = owners[rank]*bs;
13890ac07820SSatish Balay 
13900ac07820SSatish Balay   /*  wait on receives */
13910ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
13920ac07820SSatish Balay   source = lens + nrecvs;
13930ac07820SSatish Balay   count  = nrecvs; slen = 0;
13940ac07820SSatish Balay   while (count) {
13950ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
13960ac07820SSatish Balay     /* unpack receives into our local space */
13970ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
13980ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
13990ac07820SSatish Balay     lens[imdex]  = n;
14000ac07820SSatish Balay     slen += n;
14010ac07820SSatish Balay     count--;
14020ac07820SSatish Balay   }
14030ac07820SSatish Balay   PetscFree(recv_waits);
14040ac07820SSatish Balay 
14050ac07820SSatish Balay   /* move the data into the send scatter */
14060ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
14070ac07820SSatish Balay   count = 0;
14080ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
14090ac07820SSatish Balay     values = rvalues + i*nmax;
14100ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
14110ac07820SSatish Balay       lrows[count++] = values[j] - base;
14120ac07820SSatish Balay     }
14130ac07820SSatish Balay   }
14140ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
14150ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
14160ac07820SSatish Balay 
14170ac07820SSatish Balay   /* actually zap the local rows */
1418029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
14190ac07820SSatish Balay   PLogObjectParent(A,istmp);
14200ac07820SSatish Balay   PetscFree(lrows);
14210ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
14220ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
14230ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
14240ac07820SSatish Balay 
14250ac07820SSatish Balay   /* wait on sends */
14260ac07820SSatish Balay   if (nsends) {
14270ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
14280ac07820SSatish Balay     CHKPTRQ(send_status);
14290ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
14300ac07820SSatish Balay     PetscFree(send_status);
14310ac07820SSatish Balay   }
14320ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
14330ac07820SSatish Balay 
1434*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
14350ac07820SSatish Balay }
1436ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
14375615d1e5SSatish Balay #undef __FUNC__
14385615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1439ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1440ba4ca20aSSatish Balay {
1441ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1442*3a40ed3dSBarry Smith   int         ierr;
1443ba4ca20aSSatish Balay 
1444*3a40ed3dSBarry Smith   if (!a->rank) {
1445*3a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1446*3a40ed3dSBarry Smith   }
1447*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
1448ba4ca20aSSatish Balay }
14490ac07820SSatish Balay 
14505615d1e5SSatish Balay #undef __FUNC__
14515615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1452ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1453bb5a7306SBarry Smith {
1454bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1455bb5a7306SBarry Smith   int         ierr;
1456bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1457*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
1458bb5a7306SBarry Smith }
1459bb5a7306SBarry Smith 
14600ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
14610ac07820SSatish Balay 
146279bdfe76SSatish Balay /* -------------------------------------------------------------------*/
146379bdfe76SSatish Balay static struct _MatOps MatOps = {
1464bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
14654c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
14664c50302cSBarry Smith   0,0,0,0,
14670ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
14680e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
146958667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
14704c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
14714c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
14724c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
147394a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1474d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1475ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1476bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1477ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
147879bdfe76SSatish Balay 
147979bdfe76SSatish Balay 
14805615d1e5SSatish Balay #undef __FUNC__
14815615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
148279bdfe76SSatish Balay /*@C
148379bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
148479bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
148579bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
148679bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
148779bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
148879bdfe76SSatish Balay 
148979bdfe76SSatish Balay    Input Parameters:
149079bdfe76SSatish Balay .  comm - MPI communicator
149179bdfe76SSatish Balay .  bs   - size of blockk
149279bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
149392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
149492e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
149592e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
149692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
149792e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
149879bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
149992e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
150079bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
150179bdfe76SSatish Balay            submatrix  (same for all local rows)
150292e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
150392e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
150492e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
150592e8d321SLois Curfman McInnes            it is zero.
150692e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
150779bdfe76SSatish Balay            submatrix (same for all local rows).
150892e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
150992e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
151092e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
151179bdfe76SSatish Balay 
151279bdfe76SSatish Balay    Output Parameter:
151379bdfe76SSatish Balay .  A - the matrix
151479bdfe76SSatish Balay 
151579bdfe76SSatish Balay    Notes:
151679bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
151779bdfe76SSatish Balay    (possibly both).
151879bdfe76SSatish Balay 
151979bdfe76SSatish Balay    Storage Information:
152079bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
152179bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
152279bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
152379bdfe76SSatish Balay    local matrix (a rectangular submatrix).
152479bdfe76SSatish Balay 
152579bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
152679bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
152779bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
152879bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
152979bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
153079bdfe76SSatish Balay 
153179bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
153279bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
153379bdfe76SSatish Balay 
153479bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
153579bdfe76SSatish Balay $         -------------------
153679bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
153779bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
153879bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
153979bdfe76SSatish Balay $         -------------------
154079bdfe76SSatish Balay $
154179bdfe76SSatish Balay 
154279bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
154379bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
154479bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
154557b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
154679bdfe76SSatish Balay 
154779bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
154879bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
154979bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
155092e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
155192e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
15526da5968aSLois Curfman McInnes    matrices.
155379bdfe76SSatish Balay 
155492e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
155579bdfe76SSatish Balay 
155679bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
155779bdfe76SSatish Balay @*/
155879bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
155979bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
156079bdfe76SSatish Balay {
156179bdfe76SSatish Balay   Mat          B;
156279bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
15634c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
156479bdfe76SSatish Balay 
15653914022bSBarry Smith   if (bs < 1) SETERRQ(1,0,"Invalid block size specified, must be positive");
15663914022bSBarry Smith 
15673914022bSBarry Smith   MPI_Comm_size(comm,&size);
15683914022bSBarry Smith   if (size == 1) {
15693914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
15703914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
15713914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1572*3a40ed3dSBarry Smith     PetscFunctionReturn(0);
15733914022bSBarry Smith   }
15743914022bSBarry Smith 
157579bdfe76SSatish Balay   *A = 0;
1576d4bb536fSBarry Smith   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
157779bdfe76SSatish Balay   PLogObjectCreate(B);
157879bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
157979bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
158079bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
15814c50302cSBarry Smith 
158279bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
158379bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
158490f02eecSBarry Smith   B->mapping    = 0;
158579bdfe76SSatish Balay   B->factor     = 0;
158679bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
158779bdfe76SSatish Balay 
1588e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
158979bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
159079bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
159179bdfe76SSatish Balay 
159279bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1593e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1594e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1595e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1596cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1597cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
159879bdfe76SSatish Balay 
159979bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
160079bdfe76SSatish Balay     work[0] = m; work[1] = n;
160179bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
160279bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
160379bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
160479bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
160579bdfe76SSatish Balay   }
160679bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
160779bdfe76SSatish Balay     Mbs = M/bs;
1608e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
160979bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
161079bdfe76SSatish Balay     m   = mbs*bs;
161179bdfe76SSatish Balay   }
161279bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
161379bdfe76SSatish Balay     Nbs = N/bs;
1614e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
161579bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
161679bdfe76SSatish Balay     n   = nbs*bs;
161779bdfe76SSatish Balay   }
1618e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
161979bdfe76SSatish Balay 
162079bdfe76SSatish Balay   b->m = m; B->m = m;
162179bdfe76SSatish Balay   b->n = n; B->n = n;
162279bdfe76SSatish Balay   b->N = N; B->N = N;
162379bdfe76SSatish Balay   b->M = M; B->M = M;
162479bdfe76SSatish Balay   b->bs  = bs;
162579bdfe76SSatish Balay   b->bs2 = bs*bs;
162679bdfe76SSatish Balay   b->mbs = mbs;
162779bdfe76SSatish Balay   b->nbs = nbs;
162879bdfe76SSatish Balay   b->Mbs = Mbs;
162979bdfe76SSatish Balay   b->Nbs = Nbs;
163079bdfe76SSatish Balay 
163179bdfe76SSatish Balay   /* build local table of row and column ownerships */
163279bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1633f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
16340ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
163579bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
163679bdfe76SSatish Balay   b->rowners[0] = 0;
163779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
163879bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
163979bdfe76SSatish Balay   }
164079bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
164179bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
16424fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
16434fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
16444fa0d573SSatish Balay 
164579bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
164679bdfe76SSatish Balay   b->cowners[0] = 0;
164779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
164879bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
164979bdfe76SSatish Balay   }
165079bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
165179bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
16524fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
16534fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
165479bdfe76SSatish Balay 
165579bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1656029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
165779bdfe76SSatish Balay   PLogObjectParent(B,b->A);
165879bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1659029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
166079bdfe76SSatish Balay   PLogObjectParent(B,b->B);
166179bdfe76SSatish Balay 
166279bdfe76SSatish Balay   /* build cache for off array entries formed */
166379bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
166490f02eecSBarry Smith   b->donotstash  = 0;
166579bdfe76SSatish Balay   b->colmap      = 0;
166679bdfe76SSatish Balay   b->garray      = 0;
166779bdfe76SSatish Balay   b->roworiented = 1;
166879bdfe76SSatish Balay 
166930793edcSSatish Balay   /* stuff used in block assembly */
167030793edcSSatish Balay   b->barray      = 0;
167130793edcSSatish Balay 
167279bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
167379bdfe76SSatish Balay   b->lvec        = 0;
167479bdfe76SSatish Balay   b->Mvctx       = 0;
167579bdfe76SSatish Balay 
167679bdfe76SSatish Balay   /* stuff for MatGetRow() */
167779bdfe76SSatish Balay   b->rowindices   = 0;
167879bdfe76SSatish Balay   b->rowvalues    = 0;
167979bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
168079bdfe76SSatish Balay 
168179bdfe76SSatish Balay   *A = B;
1682*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
168379bdfe76SSatish Balay }
1684026e39d0SSatish Balay 
16855615d1e5SSatish Balay #undef __FUNC__
16865615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
16870ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
16880ac07820SSatish Balay {
16890ac07820SSatish Balay   Mat         mat;
16900ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
16910ac07820SSatish Balay   int         ierr, len=0, flg;
16920ac07820SSatish Balay 
16930ac07820SSatish Balay   *newmat       = 0;
1694d4bb536fSBarry Smith   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
16950ac07820SSatish Balay   PLogObjectCreate(mat);
16960ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
16970ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
16980ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
16990ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
17000ac07820SSatish Balay   mat->factor     = matin->factor;
17010ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
17020ac07820SSatish Balay 
17030ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
17040ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
17050ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
17060ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
17070ac07820SSatish Balay 
17080ac07820SSatish Balay   a->bs  = oldmat->bs;
17090ac07820SSatish Balay   a->bs2 = oldmat->bs2;
17100ac07820SSatish Balay   a->mbs = oldmat->mbs;
17110ac07820SSatish Balay   a->nbs = oldmat->nbs;
17120ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
17130ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
17140ac07820SSatish Balay 
17150ac07820SSatish Balay   a->rstart       = oldmat->rstart;
17160ac07820SSatish Balay   a->rend         = oldmat->rend;
17170ac07820SSatish Balay   a->cstart       = oldmat->cstart;
17180ac07820SSatish Balay   a->cend         = oldmat->cend;
17190ac07820SSatish Balay   a->size         = oldmat->size;
17200ac07820SSatish Balay   a->rank         = oldmat->rank;
1721e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
17220ac07820SSatish Balay   a->rowvalues    = 0;
17230ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
172430793edcSSatish Balay   a->barray       = 0;
17250ac07820SSatish Balay 
17260ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1727f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
17280ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
17290ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
17300ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
17310ac07820SSatish Balay   if (oldmat->colmap) {
17320ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
17330ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
17340ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
17350ac07820SSatish Balay   } else a->colmap = 0;
17360ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
17370ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
17380ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
17390ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
17400ac07820SSatish Balay   } else a->garray = 0;
17410ac07820SSatish Balay 
17420ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
17430ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
17440ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
17450ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
17460ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
17470ac07820SSatish Balay   PLogObjectParent(mat,a->A);
17480ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
17490ac07820SSatish Balay   PLogObjectParent(mat,a->B);
17500ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
17510ac07820SSatish Balay   if (flg) {
17520ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
17530ac07820SSatish Balay   }
17540ac07820SSatish Balay   *newmat = mat;
1755*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
17560ac07820SSatish Balay }
175757b952d6SSatish Balay 
175857b952d6SSatish Balay #include "sys.h"
175957b952d6SSatish Balay 
17605615d1e5SSatish Balay #undef __FUNC__
17615615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
176257b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
176357b952d6SSatish Balay {
176457b952d6SSatish Balay   Mat          A;
176557b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
176657b952d6SSatish Balay   Scalar       *vals,*buf;
176757b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
176857b952d6SSatish Balay   MPI_Status   status;
1769cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
177057b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
177157b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
177257b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
177357b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
177457b952d6SSatish Balay 
177557b952d6SSatish Balay 
177657b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
177757b952d6SSatish Balay   bs2  = bs*bs;
177857b952d6SSatish Balay 
177957b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
178057b952d6SSatish Balay   if (!rank) {
178157b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1782e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1783e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
178457b952d6SSatish Balay   }
178557b952d6SSatish Balay 
178657b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
178757b952d6SSatish Balay   M = header[1]; N = header[2];
178857b952d6SSatish Balay 
1789e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
179057b952d6SSatish Balay 
179157b952d6SSatish Balay   /*
179257b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
179357b952d6SSatish Balay      divisible by the blocksize
179457b952d6SSatish Balay   */
179557b952d6SSatish Balay   Mbs        = M/bs;
179657b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
179757b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
179857b952d6SSatish Balay   else                  Mbs++;
179957b952d6SSatish Balay   if (extra_rows &&!rank) {
1800b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
180157b952d6SSatish Balay   }
1802537820f0SBarry Smith 
180357b952d6SSatish Balay   /* determine ownership of all rows */
180457b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
180557b952d6SSatish Balay   m   = mbs * bs;
1806cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1807cee3aa6bSSatish Balay   browners = rowners + size + 1;
180857b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
180957b952d6SSatish Balay   rowners[0] = 0;
1810cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1811cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
181257b952d6SSatish Balay   rstart = rowners[rank];
181357b952d6SSatish Balay   rend   = rowners[rank+1];
181457b952d6SSatish Balay 
181557b952d6SSatish Balay   /* distribute row lengths to all processors */
181657b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
181757b952d6SSatish Balay   if (!rank) {
181857b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
1819e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
182057b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
182157b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1822cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1823cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
182457b952d6SSatish Balay     PetscFree(sndcounts);
182557b952d6SSatish Balay   }
182657b952d6SSatish Balay   else {
182757b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
182857b952d6SSatish Balay   }
182957b952d6SSatish Balay 
183057b952d6SSatish Balay   if (!rank) {
183157b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
183257b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
183357b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
183457b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
183557b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
183657b952d6SSatish Balay         procsnz[i] += rowlengths[j];
183757b952d6SSatish Balay       }
183857b952d6SSatish Balay     }
183957b952d6SSatish Balay     PetscFree(rowlengths);
184057b952d6SSatish Balay 
184157b952d6SSatish Balay     /* determine max buffer needed and allocate it */
184257b952d6SSatish Balay     maxnz = 0;
184357b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
184457b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
184557b952d6SSatish Balay     }
184657b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
184757b952d6SSatish Balay 
184857b952d6SSatish Balay     /* read in my part of the matrix column indices  */
184957b952d6SSatish Balay     nz = procsnz[0];
185057b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
185157b952d6SSatish Balay     mycols = ibuf;
1852cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
1853e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1854cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1855cee3aa6bSSatish Balay 
185657b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
185757b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
185857b952d6SSatish Balay       nz = procsnz[i];
1859e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
186057b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
186157b952d6SSatish Balay     }
186257b952d6SSatish Balay     /* read in the stuff for the last proc */
186357b952d6SSatish Balay     if ( size != 1 ) {
186457b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
1865e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
186657b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1867cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
186857b952d6SSatish Balay     }
186957b952d6SSatish Balay     PetscFree(cols);
187057b952d6SSatish Balay   }
187157b952d6SSatish Balay   else {
187257b952d6SSatish Balay     /* determine buffer space needed for message */
187357b952d6SSatish Balay     nz = 0;
187457b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
187557b952d6SSatish Balay       nz += locrowlens[i];
187657b952d6SSatish Balay     }
187757b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
187857b952d6SSatish Balay     mycols = ibuf;
187957b952d6SSatish Balay     /* receive message of column indices*/
188057b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
188157b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1882e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
188357b952d6SSatish Balay   }
188457b952d6SSatish Balay 
188557b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1886cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1887cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
188857b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1889cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
189057b952d6SSatish Balay   masked1 = mask    + Mbs;
189157b952d6SSatish Balay   masked2 = masked1 + Mbs;
189257b952d6SSatish Balay   rowcount = 0; nzcount = 0;
189357b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
189457b952d6SSatish Balay     dcount  = 0;
189557b952d6SSatish Balay     odcount = 0;
189657b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
189757b952d6SSatish Balay       kmax = locrowlens[rowcount];
189857b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
189957b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
190057b952d6SSatish Balay         if (!mask[tmp]) {
190157b952d6SSatish Balay           mask[tmp] = 1;
190257b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
190357b952d6SSatish Balay           else masked1[dcount++] = tmp;
190457b952d6SSatish Balay         }
190557b952d6SSatish Balay       }
190657b952d6SSatish Balay       rowcount++;
190757b952d6SSatish Balay     }
1908cee3aa6bSSatish Balay 
190957b952d6SSatish Balay     dlens[i]  = dcount;
191057b952d6SSatish Balay     odlens[i] = odcount;
1911cee3aa6bSSatish Balay 
191257b952d6SSatish Balay     /* zero out the mask elements we set */
191357b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
191457b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
191557b952d6SSatish Balay   }
1916cee3aa6bSSatish Balay 
191757b952d6SSatish Balay   /* create our matrix */
1918537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1919537820f0SBarry Smith          CHKERRQ(ierr);
192057b952d6SSatish Balay   A = *newmat;
19216d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
192257b952d6SSatish Balay 
192357b952d6SSatish Balay   if (!rank) {
192457b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
192557b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
192657b952d6SSatish Balay     nz = procsnz[0];
192757b952d6SSatish Balay     vals = buf;
1928cee3aa6bSSatish Balay     mycols = ibuf;
1929cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
1930e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1931cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1932537820f0SBarry Smith 
193357b952d6SSatish Balay     /* insert into matrix */
193457b952d6SSatish Balay     jj      = rstart*bs;
193557b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
193657b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
193757b952d6SSatish Balay       mycols += locrowlens[i];
193857b952d6SSatish Balay       vals   += locrowlens[i];
193957b952d6SSatish Balay       jj++;
194057b952d6SSatish Balay     }
194157b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
194257b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
194357b952d6SSatish Balay       nz = procsnz[i];
194457b952d6SSatish Balay       vals = buf;
1945e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
194657b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
194757b952d6SSatish Balay     }
194857b952d6SSatish Balay     /* the last proc */
194957b952d6SSatish Balay     if ( size != 1 ){
195057b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1951cee3aa6bSSatish Balay       vals = buf;
1952e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
195357b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1954cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
195557b952d6SSatish Balay     }
195657b952d6SSatish Balay     PetscFree(procsnz);
195757b952d6SSatish Balay   }
195857b952d6SSatish Balay   else {
195957b952d6SSatish Balay     /* receive numeric values */
196057b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
196157b952d6SSatish Balay 
196257b952d6SSatish Balay     /* receive message of values*/
196357b952d6SSatish Balay     vals = buf;
1964cee3aa6bSSatish Balay     mycols = ibuf;
196557b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
196657b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1967e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
196857b952d6SSatish Balay 
196957b952d6SSatish Balay     /* insert into matrix */
197057b952d6SSatish Balay     jj      = rstart*bs;
1971cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
197257b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
197357b952d6SSatish Balay       mycols += locrowlens[i];
197457b952d6SSatish Balay       vals   += locrowlens[i];
197557b952d6SSatish Balay       jj++;
197657b952d6SSatish Balay     }
197757b952d6SSatish Balay   }
197857b952d6SSatish Balay   PetscFree(locrowlens);
197957b952d6SSatish Balay   PetscFree(buf);
198057b952d6SSatish Balay   PetscFree(ibuf);
198157b952d6SSatish Balay   PetscFree(rowners);
198257b952d6SSatish Balay   PetscFree(dlens);
1983cee3aa6bSSatish Balay   PetscFree(mask);
19846d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19856d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1986*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
198757b952d6SSatish Balay }
198857b952d6SSatish Balay 
198957b952d6SSatish Balay 
1990