xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a07cd24c6a245fc42573a0cf9f579b10781b6fbf)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*a07cd24cSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.91 1997/12/01 01:55:07 bsmith Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
143b2fbd54SBarry Smith 
15537820f0SBarry Smith /*
16537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
1757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
1857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
1957b952d6SSatish Balay    length of colmap equals the global matrix length.
2057b952d6SSatish Balay */
215615d1e5SSatish Balay #undef __FUNC__
225615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
2357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2457b952d6SSatish Balay {
2557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
27928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
2857b952d6SSatish Balay 
29d64ed03dSBarry Smith   PetscFunctionBegin;
30758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3157b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3257b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
33928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
343a40ed3dSBarry Smith   PetscFunctionReturn(0);
3557b952d6SSatish Balay }
3657b952d6SSatish Balay 
3780c1aa95SSatish Balay #define CHUNKSIZE  10
3880c1aa95SSatish Balay 
39f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
4080c1aa95SSatish Balay { \
4180c1aa95SSatish Balay  \
4280c1aa95SSatish Balay     brow = row/bs;  \
4380c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
44ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
4580c1aa95SSatish Balay       bcol = col/bs; \
4680c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
47ab26458aSBarry Smith       low = 0; high = nrow; \
48ab26458aSBarry Smith       while (high-low > 3) { \
49ab26458aSBarry Smith         t = (low+high)/2; \
50ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
51ab26458aSBarry Smith         else              low  = t; \
52ab26458aSBarry Smith       } \
53ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
5480c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
5580c1aa95SSatish Balay         if (rp[_i] == bcol) { \
5680c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
57eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
58eada6651SSatish Balay           else                    *bap  = value;  \
59ac7a638eSSatish Balay           goto a_noinsert; \
6080c1aa95SSatish Balay         } \
6180c1aa95SSatish Balay       } \
6289280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
63a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
6480c1aa95SSatish Balay       if (nrow >= rmax) { \
6580c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
6680c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
6780c1aa95SSatish Balay         Scalar *new_a; \
6880c1aa95SSatish Balay  \
69a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
7089280ab3SLois Curfman McInnes  \
7180c1aa95SSatish Balay         /* malloc new storage space */ \
7280c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
7380c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
7480c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
7580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
7680c1aa95SSatish Balay  \
7780c1aa95SSatish Balay         /* copy over old data into new slots */ \
7880c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
7980c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
8080c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
8180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
8280c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
8380c1aa95SSatish Balay                                                            len*sizeof(int)); \
8480c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
8580c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
8680c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
8780c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
8880c1aa95SSatish Balay         /* free up old matrix storage */ \
8980c1aa95SSatish Balay         PetscFree(a->a);  \
9080c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
9180c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
9280c1aa95SSatish Balay         a->singlemalloc = 1; \
9380c1aa95SSatish Balay  \
9480c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
9680c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
9780c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
9880c1aa95SSatish Balay         a->reallocs++; \
9980c1aa95SSatish Balay         a->nz++; \
10080c1aa95SSatish Balay       } \
10180c1aa95SSatish Balay       N = nrow++ - 1;  \
10280c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
10380c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
10480c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
10580c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
10680c1aa95SSatish Balay       } \
10780c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
10880c1aa95SSatish Balay       rp[_i]                      = bcol;  \
10980c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
110ac7a638eSSatish Balay       a_noinsert:; \
11180c1aa95SSatish Balay     ailen[brow] = nrow; \
11280c1aa95SSatish Balay }
11357b952d6SSatish Balay 
114ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
115ac7a638eSSatish Balay { \
116ac7a638eSSatish Balay  \
117ac7a638eSSatish Balay     brow = row/bs;  \
118ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
119ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
120ac7a638eSSatish Balay       bcol = col/bs; \
121ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
122ac7a638eSSatish Balay       low = 0; high = nrow; \
123ac7a638eSSatish Balay       while (high-low > 3) { \
124ac7a638eSSatish Balay         t = (low+high)/2; \
125ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
126ac7a638eSSatish Balay         else              low  = t; \
127ac7a638eSSatish Balay       } \
128ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
129ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
130ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
131ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
132ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
133ac7a638eSSatish Balay           else                    *bap  = value;  \
134ac7a638eSSatish Balay           goto b_noinsert; \
135ac7a638eSSatish Balay         } \
136ac7a638eSSatish Balay       } \
13789280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
138a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139ac7a638eSSatish Balay       if (nrow >= rmax) { \
140ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
14174c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
142ac7a638eSSatish Balay         Scalar *new_a; \
143ac7a638eSSatish Balay  \
144a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
14589280ab3SLois Curfman McInnes  \
146ac7a638eSSatish Balay         /* malloc new storage space */ \
14774c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
148ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
149ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
150ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
151ac7a638eSSatish Balay  \
152ac7a638eSSatish Balay         /* copy over old data into new slots */ \
153ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
15474c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
156ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
157ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
158ac7a638eSSatish Balay                                                            len*sizeof(int)); \
159ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
160ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
161ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
162ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
163ac7a638eSSatish Balay         /* free up old matrix storage */ \
16474c639caSSatish Balay         PetscFree(b->a);  \
16574c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
16674c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
16774c639caSSatish Balay         b->singlemalloc = 1; \
168ac7a638eSSatish Balay  \
169ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
17174c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
17274c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
17374c639caSSatish Balay         b->reallocs++; \
17474c639caSSatish Balay         b->nz++; \
175ac7a638eSSatish Balay       } \
176ac7a638eSSatish Balay       N = nrow++ - 1;  \
177ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
178ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
179ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
180ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
181ac7a638eSSatish Balay       } \
182ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
183ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
184ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
185ac7a638eSSatish Balay       b_noinsert:; \
186ac7a638eSSatish Balay     bilen[brow] = nrow; \
187ac7a638eSSatish Balay }
188ac7a638eSSatish Balay 
1895615d1e5SSatish Balay #undef __FUNC__
1905615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
191ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
19257b952d6SSatish Balay {
19357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
19457b952d6SSatish Balay   Scalar      value;
1954fa0d573SSatish Balay   int         ierr,i,j,row,col;
1964fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1974fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1984fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
19957b952d6SSatish Balay 
200eada6651SSatish Balay   /* Some Variables required in the macro */
20180c1aa95SSatish Balay   Mat         A = baij->A;
20280c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
203ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204ac7a638eSSatish Balay   Scalar      *aa=a->a;
205ac7a638eSSatish Balay 
206ac7a638eSSatish Balay   Mat         B = baij->B;
207ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
208ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209ac7a638eSSatish Balay   Scalar      *ba=b->a;
210ac7a638eSSatish Balay 
211ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
212ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
213ac7a638eSSatish Balay   Scalar      *ap,*bap;
21480c1aa95SSatish Balay 
215d64ed03dSBarry Smith   PetscFunctionBegin;
21657b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
2173a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
218a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
219a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220639f9d9dSBarry Smith #endif
22157b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
22257b952d6SSatish Balay       row = im[i] - rstart_orig;
22357b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
22457b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
22557b952d6SSatish Balay           col = in[j] - cstart_orig;
22657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
22880c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
22957b952d6SSatish Balay         }
2303a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
231a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
232a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
233639f9d9dSBarry Smith #endif
23457b952d6SSatish Balay         else {
23557b952d6SSatish Balay           if (mat->was_assembled) {
236905e6a2fSBarry Smith             if (!baij->colmap) {
237905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238905e6a2fSBarry Smith             }
239905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
24057b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
24157b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
24257b952d6SSatish Balay               col =  in[j];
2439bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2449bf004c3SSatish Balay               B = baij->B;
2459bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2469bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2479bf004c3SSatish Balay               ba=b->a;
24857b952d6SSatish Balay             }
249d64ed03dSBarry Smith           } else col = in[j];
25057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
252ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25357b952d6SSatish Balay         }
25457b952d6SSatish Balay       }
255d64ed03dSBarry Smith     } else {
25690f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
25757b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258d64ed03dSBarry Smith       } else {
25990f02eecSBarry Smith         if (!baij->donotstash) {
26057b952d6SSatish Balay           row = im[i];
26157b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
26257b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
26357b952d6SSatish Balay           }
26457b952d6SSatish Balay         }
26557b952d6SSatish Balay       }
26657b952d6SSatish Balay     }
26790f02eecSBarry Smith   }
2683a40ed3dSBarry Smith   PetscFunctionReturn(0);
26957b952d6SSatish Balay }
27057b952d6SSatish Balay 
271ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
272ab26458aSBarry Smith #undef __FUNC__
273ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
274ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
275ab26458aSBarry Smith {
276ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27730793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
278abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
279ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
280ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
281ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
282ab26458aSBarry Smith 
28330793edcSSatish Balay   if(!barray) {
28447513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
28530793edcSSatish Balay   }
28630793edcSSatish Balay 
287ab26458aSBarry Smith   if (roworiented) {
288ab26458aSBarry Smith     stepval = (n-1)*bs;
289ab26458aSBarry Smith   } else {
290ab26458aSBarry Smith     stepval = (m-1)*bs;
291ab26458aSBarry Smith   }
292ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
2933a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
294a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
295a8c6a408SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
296ab26458aSBarry Smith #endif
297ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
298ab26458aSBarry Smith       row = im[i] - rstart;
299ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
30015b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
30115b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
30215b57d14SSatish Balay           barray = v + i*bs2;
30315b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
30415b57d14SSatish Balay           barray = v + j*bs2;
30515b57d14SSatish Balay         } else { /* Here a copy is required */
306ab26458aSBarry Smith           if (roworiented) {
307ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
308ab26458aSBarry Smith           } else {
309ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
310abef11f7SSatish Balay           }
31147513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
31247513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
31330793edcSSatish Balay               *barray++  = *value++;
31447513183SBarry Smith             }
31547513183SBarry Smith           }
31630793edcSSatish Balay           barray -=bs2;
31715b57d14SSatish Balay         }
318abef11f7SSatish Balay 
319abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
320abef11f7SSatish Balay           col  = in[j] - cstart;
32130793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
322ab26458aSBarry Smith         }
3233a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
324a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
325a8c6a408SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
326ab26458aSBarry Smith #endif
327ab26458aSBarry Smith         else {
328ab26458aSBarry Smith           if (mat->was_assembled) {
329ab26458aSBarry Smith             if (!baij->colmap) {
330ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
331ab26458aSBarry Smith             }
332a5eb4965SSatish Balay 
3333a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
334a8c6a408SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
335a5eb4965SSatish Balay #endif
336a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
337ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
338ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
339ab26458aSBarry Smith               col =  in[j];
340ab26458aSBarry Smith             }
341ab26458aSBarry Smith           }
342ab26458aSBarry Smith           else col = in[j];
34330793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344ab26458aSBarry Smith         }
345ab26458aSBarry Smith       }
346d64ed03dSBarry Smith     } else {
347ab26458aSBarry Smith       if (!baij->donotstash) {
348ab26458aSBarry Smith         if (roworiented ) {
349abef11f7SSatish Balay           row   = im[i]*bs;
350abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
351abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
352abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
353abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
354abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
355abef11f7SSatish Balay               }
356ab26458aSBarry Smith             }
357ab26458aSBarry Smith           }
358d64ed03dSBarry Smith         } else {
359ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
360abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
361abef11f7SSatish Balay             col   = in[j]*bs;
362abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
363abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
364abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
365ab26458aSBarry Smith               }
366ab26458aSBarry Smith             }
367ab26458aSBarry Smith           }
368abef11f7SSatish Balay         }
369abef11f7SSatish Balay       }
370ab26458aSBarry Smith     }
371ab26458aSBarry Smith   }
3723a40ed3dSBarry Smith   PetscFunctionReturn(0);
373ab26458aSBarry Smith }
374ab26458aSBarry Smith 
3755615d1e5SSatish Balay #undef __FUNC__
3765615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
377ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
378d6de1c52SSatish Balay {
379d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
380d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
381d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
382d6de1c52SSatish Balay 
383d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
384a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
385a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
386d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
387d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
388d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
389a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
390a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
391d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
392d6de1c52SSatish Balay           col = idxn[j] - bscstart;
393d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
394d64ed03dSBarry Smith         } else {
395905e6a2fSBarry Smith           if (!baij->colmap) {
396905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
397905e6a2fSBarry Smith           }
398e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
399dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
400d9d09a02SSatish Balay           else {
401dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
402d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
403d6de1c52SSatish Balay           }
404d6de1c52SSatish Balay         }
405d6de1c52SSatish Balay       }
406d64ed03dSBarry Smith     } else {
407a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
408d6de1c52SSatish Balay     }
409d6de1c52SSatish Balay   }
4103a40ed3dSBarry Smith   PetscFunctionReturn(0);
411d6de1c52SSatish Balay }
412d6de1c52SSatish Balay 
4135615d1e5SSatish Balay #undef __FUNC__
4145615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
415ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
416d6de1c52SSatish Balay {
417d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
418d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
419acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
420d6de1c52SSatish Balay   double     sum = 0.0;
421d6de1c52SSatish Balay   Scalar     *v;
422d6de1c52SSatish Balay 
423d64ed03dSBarry Smith   PetscFunctionBegin;
424d6de1c52SSatish Balay   if (baij->size == 1) {
425d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
426d6de1c52SSatish Balay   } else {
427d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
428d6de1c52SSatish Balay       v = amat->a;
429d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
4303a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
431d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
432d6de1c52SSatish Balay #else
433d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
434d6de1c52SSatish Balay #endif
435d6de1c52SSatish Balay       }
436d6de1c52SSatish Balay       v = bmat->a;
437d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
4383a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
439d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
440d6de1c52SSatish Balay #else
441d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
442d6de1c52SSatish Balay #endif
443d6de1c52SSatish Balay       }
444ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
445d6de1c52SSatish Balay       *norm = sqrt(*norm);
446d64ed03dSBarry Smith     } else {
447e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
448d6de1c52SSatish Balay     }
449d64ed03dSBarry Smith   }
4503a40ed3dSBarry Smith   PetscFunctionReturn(0);
451d6de1c52SSatish Balay }
45257b952d6SSatish Balay 
4535615d1e5SSatish Balay #undef __FUNC__
4545615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
455ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
45657b952d6SSatish Balay {
45757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
45857b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
45957b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
46057b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
46157b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
46257b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
46357b952d6SSatish Balay   InsertMode  addv;
46457b952d6SSatish Balay   Scalar      *rvalues,*svalues;
46557b952d6SSatish Balay 
466d64ed03dSBarry Smith   PetscFunctionBegin;
46757b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
468ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
46957b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
470a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
47157b952d6SSatish Balay   }
472e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
47357b952d6SSatish Balay 
47457b952d6SSatish Balay   /*  first count number of contributors to each processor */
47557b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
47657b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
47757b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
47857b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
47957b952d6SSatish Balay     idx = baij->stash.idx[i];
48057b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
48157b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
48257b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
48357b952d6SSatish Balay       }
48457b952d6SSatish Balay     }
48557b952d6SSatish Balay   }
48657b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
48757b952d6SSatish Balay 
48857b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
48957b952d6SSatish Balay   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
490ca161407SBarry Smith   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
49157b952d6SSatish Balay   nreceives = work[rank];
492ca161407SBarry Smith   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
49357b952d6SSatish Balay   nmax      = work[rank];
49457b952d6SSatish Balay   PetscFree(work);
49557b952d6SSatish Balay 
49657b952d6SSatish Balay   /* post receives:
49757b952d6SSatish Balay        1) each message will consist of ordered pairs
49857b952d6SSatish Balay      (global index,value) we store the global index as a double
49957b952d6SSatish Balay      to simplify the message passing.
50057b952d6SSatish Balay        2) since we don't know how long each individual message is we
50157b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
50257b952d6SSatish Balay      this is a lot of wasted space.
50357b952d6SSatish Balay 
50457b952d6SSatish Balay 
50557b952d6SSatish Balay        This could be done better.
50657b952d6SSatish Balay   */
507f8abc2e8SBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
508f8abc2e8SBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
50957b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
510ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
51157b952d6SSatish Balay   }
51257b952d6SSatish Balay 
51357b952d6SSatish Balay   /* do sends:
51457b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
51557b952d6SSatish Balay          the ith processor
51657b952d6SSatish Balay   */
51757b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
518d64ed03dSBarry Smith   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
51957b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
52057b952d6SSatish Balay   starts[0] = 0;
52157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
52257b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
52357b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
52457b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
52557b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
52657b952d6SSatish Balay   }
52757b952d6SSatish Balay   PetscFree(owner);
52857b952d6SSatish Balay   starts[0] = 0;
52957b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
53057b952d6SSatish Balay   count = 0;
53157b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
53257b952d6SSatish Balay     if (procs[i]) {
533ca161407SBarry Smith       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
53457b952d6SSatish Balay     }
53557b952d6SSatish Balay   }
53657b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
53757b952d6SSatish Balay 
53857b952d6SSatish Balay   /* Free cache space */
539cd1fa5fbSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n);
54057b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
54157b952d6SSatish Balay 
54257b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
54357b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
54457b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
54557b952d6SSatish Balay   baij->rmax       = nmax;
54657b952d6SSatish Balay 
5473a40ed3dSBarry Smith   PetscFunctionReturn(0);
54857b952d6SSatish Balay }
549596b8d2eSBarry Smith #include <math.h>
550596b8d2eSBarry Smith #define HASH_KEY 0.6180339887
551bd7f49f5SSatish Balay #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1)))
552bd7f49f5SSatish Balay #define HASH2(size,key) ((int)((size)*fmod(((key+0.5)*HASH_KEY),1)))
55357b952d6SSatish Balay 
554bd7f49f5SSatish Balay 
555bd7f49f5SSatish Balay int CreateHashTable(Mat mat,double factor)
556596b8d2eSBarry Smith {
557596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
558596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
559596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
560bd7f49f5SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,h2,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
561bd7f49f5SSatish Balay   int         size=(int)(factor*nz),ct=0,max1=0,max2=0;
562*a07cd24cSSatish Balay   double      *HT,key;
563*a07cd24cSSatish Balay   extern int  PetscGlobalRank;
564*a07cd24cSSatish Balay   /*
565*a07cd24cSSatish Balay      Scalar      *aa=a->a,*ba=b->a;
566596b8d2eSBarry Smith      static double *HT;
567596b8d2eSBarry Smith      static      int flag=1;
568*a07cd24cSSatish Balay      */
569d64ed03dSBarry Smith 
570d64ed03dSBarry Smith   PetscFunctionBegin;
571596b8d2eSBarry Smith   /* Allocate Memory for Hash Table */
572*a07cd24cSSatish Balay   if (!baij->ht) {
573*a07cd24cSSatish Balay     baij->ht = (double*)PetscMalloc(size*sizeof(double)); CHKPTRQ(baij->ht);
574596b8d2eSBarry Smith   }
575*a07cd24cSSatish Balay   HT = baij->ht;
576596b8d2eSBarry Smith   PetscMemzero(HT,size*sizeof(double));
577596b8d2eSBarry Smith 
578596b8d2eSBarry Smith   /* Loop Over A */
579596b8d2eSBarry Smith   for ( i=0; i<a->n; i++ ) {
580596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
581596b8d2eSBarry Smith       key = i*baij->n+aj[j]+1;
582596b8d2eSBarry Smith       h1  = HASH1(size,key);
583bd7f49f5SSatish Balay       h2  = HASH2(size,key);
584596b8d2eSBarry Smith 
585596b8d2eSBarry Smith       for ( k=1; k<size; k++ ){
586596b8d2eSBarry Smith         if (HT[(h1*k)%size] == 0.0) {
587596b8d2eSBarry Smith           HT[(h1*k)%size] = key;
588596b8d2eSBarry Smith           break;
589596b8d2eSBarry Smith         } else ct++;
590596b8d2eSBarry Smith       }
591bd7f49f5SSatish Balay       if (k> max1) max1 = k;
592596b8d2eSBarry Smith     }
593596b8d2eSBarry Smith   }
594596b8d2eSBarry Smith   /* Loop Over B */
595596b8d2eSBarry Smith   for ( i=0; i<b->n; i++ ) {
596596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
597596b8d2eSBarry Smith       key = i*b->n+bj[j]+1;
598596b8d2eSBarry Smith       h1  = HASH1(size,key);
599596b8d2eSBarry Smith       for ( k=1; k<size; k++ ){
600596b8d2eSBarry Smith         if (HT[(h1*k)%size] == 0.0) {
601596b8d2eSBarry Smith           HT[(h1*k)%size] = key;
602596b8d2eSBarry Smith           break;
603596b8d2eSBarry Smith         } else ct++;
604596b8d2eSBarry Smith       }
605bd7f49f5SSatish Balay       if (k> max2) max2 = k;
606596b8d2eSBarry Smith     }
607596b8d2eSBarry Smith   }
608596b8d2eSBarry Smith 
609596b8d2eSBarry Smith   /* Print Summary */
610596b8d2eSBarry Smith   for ( i=0,key=0.0,j=0; i<size; i++)
611596b8d2eSBarry Smith     if (HT[i]) {j++;}
612596b8d2eSBarry Smith 
613bd7f49f5SSatish Balay   printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n",
614bd7f49f5SSatish Balay          PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j)/j,j);
615bd7f49f5SSatish Balay   PetscFree(HT);
6163a40ed3dSBarry Smith   PetscFunctionReturn(0);
617596b8d2eSBarry Smith }
61857b952d6SSatish Balay 
6195615d1e5SSatish Balay #undef __FUNC__
6205615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
621ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
62257b952d6SSatish Balay {
62357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
62457b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
62557b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
626596b8d2eSBarry Smith   int         bs=baij->bs,row,col,other_disassembled,flg;
62757b952d6SSatish Balay   Scalar      *values,val;
628e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
62957b952d6SSatish Balay 
630d64ed03dSBarry Smith   PetscFunctionBegin;
63157b952d6SSatish Balay   /*  wait on receives */
63257b952d6SSatish Balay   while (count) {
633ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
63457b952d6SSatish Balay     /* unpack receives into our local space */
63557b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
636ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
63757b952d6SSatish Balay     n = n/3;
63857b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
63957b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
64057b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
64157b952d6SSatish Balay       val = values[3*i+2];
64257b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
64357b952d6SSatish Balay         col -= baij->cstart*bs;
6446fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
645d64ed03dSBarry Smith       } else {
64657b952d6SSatish Balay         if (mat->was_assembled) {
647905e6a2fSBarry Smith           if (!baij->colmap) {
648905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
649905e6a2fSBarry Smith           }
650a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
65157b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
65257b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
65357b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
65457b952d6SSatish Balay           }
65557b952d6SSatish Balay         }
6566fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
65757b952d6SSatish Balay       }
65857b952d6SSatish Balay     }
65957b952d6SSatish Balay     count--;
66057b952d6SSatish Balay   }
66157b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
66257b952d6SSatish Balay 
66357b952d6SSatish Balay   /* wait on sends */
66457b952d6SSatish Balay   if (baij->nsends) {
665d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
666ca161407SBarry Smith     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
66757b952d6SSatish Balay     PetscFree(send_status);
66857b952d6SSatish Balay   }
66957b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
67057b952d6SSatish Balay 
67157b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
67257b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
67357b952d6SSatish Balay 
67457b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
67557b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
676ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
67757b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
67857b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
67957b952d6SSatish Balay   }
68057b952d6SSatish Balay 
6816d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
68257b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
68357b952d6SSatish Balay   }
68457b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
68557b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
68657b952d6SSatish Balay 
687596b8d2eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr);
688bd7f49f5SSatish Balay   if (flg) {
689*a07cd24cSSatish Balay     double fact = 1.39;
690*a07cd24cSSatish Balay     /* for ( fact=1.2; fact<2.0; fact +=0.05) */ CreateHashTable(mat,fact);
691bd7f49f5SSatish Balay   }
69257b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
6933a40ed3dSBarry Smith   PetscFunctionReturn(0);
69457b952d6SSatish Balay }
69557b952d6SSatish Balay 
6965615d1e5SSatish Balay #undef __FUNC__
6975615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
69857b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
69957b952d6SSatish Balay {
70057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
70157b952d6SSatish Balay   int          ierr;
70257b952d6SSatish Balay 
703d64ed03dSBarry Smith   PetscFunctionBegin;
70457b952d6SSatish Balay   if (baij->size == 1) {
70557b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
706a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
7073a40ed3dSBarry Smith   PetscFunctionReturn(0);
70857b952d6SSatish Balay }
70957b952d6SSatish Balay 
7105615d1e5SSatish Balay #undef __FUNC__
7115615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
71257b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
71357b952d6SSatish Balay {
71457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
715cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
71657b952d6SSatish Balay   FILE         *fd;
71757b952d6SSatish Balay   ViewerType   vtype;
71857b952d6SSatish Balay 
719d64ed03dSBarry Smith   PetscFunctionBegin;
72057b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
72157b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
72257b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
723639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
7244e220ebcSLois Curfman McInnes       MatInfo info;
72557b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
72657b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
7274e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
72857b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
72957b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
7304e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
7314e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
7324e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
7334e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
7344e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
7354e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
73657b952d6SSatish Balay       fflush(fd);
73757b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
73857b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
7393a40ed3dSBarry Smith       PetscFunctionReturn(0);
740d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
741bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
7423a40ed3dSBarry Smith       PetscFunctionReturn(0);
74357b952d6SSatish Balay     }
74457b952d6SSatish Balay   }
74557b952d6SSatish Balay 
74657b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
74757b952d6SSatish Balay     Draw       draw;
74857b952d6SSatish Balay     PetscTruth isnull;
74957b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
7503a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
75157b952d6SSatish Balay   }
75257b952d6SSatish Balay 
75357b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
75457b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
75557b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
75657b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
75757b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
75857b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
75957b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
76057b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
76157b952d6SSatish Balay     fflush(fd);
76257b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
763d64ed03dSBarry Smith   } else {
76457b952d6SSatish Balay     int size = baij->size;
76557b952d6SSatish Balay     rank = baij->rank;
76657b952d6SSatish Balay     if (size == 1) {
76757b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
768d64ed03dSBarry Smith     } else {
76957b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
77057b952d6SSatish Balay       Mat         A;
77157b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
77257b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
77357b952d6SSatish Balay       int         mbs=baij->mbs;
77457b952d6SSatish Balay       Scalar      *a;
77557b952d6SSatish Balay 
77657b952d6SSatish Balay       if (!rank) {
77755843e3eSBarry Smith         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
778d64ed03dSBarry Smith       } else {
77955843e3eSBarry Smith         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
78057b952d6SSatish Balay       }
78157b952d6SSatish Balay       PLogObjectParent(mat,A);
78257b952d6SSatish Balay 
78357b952d6SSatish Balay       /* copy over the A part */
78457b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
78557b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
78657b952d6SSatish Balay       row = baij->rstart;
78757b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
78857b952d6SSatish Balay 
78957b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
79057b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
79157b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
79257b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
79357b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
79457b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
795cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
796cee3aa6bSSatish Balay             col++; a += bs;
79757b952d6SSatish Balay           }
79857b952d6SSatish Balay         }
79957b952d6SSatish Balay       }
80057b952d6SSatish Balay       /* copy over the B part */
80157b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
80257b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
80357b952d6SSatish Balay       row = baij->rstart*bs;
80457b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
80557b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
80657b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
80757b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
80857b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
80957b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
810cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
811cee3aa6bSSatish Balay             col++; a += bs;
81257b952d6SSatish Balay           }
81357b952d6SSatish Balay         }
81457b952d6SSatish Balay       }
81557b952d6SSatish Balay       PetscFree(rvals);
8166d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8176d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
81855843e3eSBarry Smith       /*
81955843e3eSBarry Smith          Everyone has to call to draw the matrix since the graphics waits are
82055843e3eSBarry Smith          synchronized across all processors that share the Draw object
82155843e3eSBarry Smith       */
82255843e3eSBarry Smith       if (!rank || vtype == DRAW_VIEWER) {
82357b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
82457b952d6SSatish Balay       }
82557b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
82657b952d6SSatish Balay     }
82757b952d6SSatish Balay   }
8283a40ed3dSBarry Smith   PetscFunctionReturn(0);
82957b952d6SSatish Balay }
83057b952d6SSatish Balay 
83157b952d6SSatish Balay 
83257b952d6SSatish Balay 
8335615d1e5SSatish Balay #undef __FUNC__
8345615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
835ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
83657b952d6SSatish Balay {
83757b952d6SSatish Balay   Mat         mat = (Mat) obj;
83857b952d6SSatish Balay   int         ierr;
83957b952d6SSatish Balay   ViewerType  vtype;
84057b952d6SSatish Balay 
841d64ed03dSBarry Smith   PetscFunctionBegin;
84257b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
84357b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
84457b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
84557b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
8463a40ed3dSBarry Smith   } else if (vtype == BINARY_FILE_VIEWER) {
8473a40ed3dSBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
84857b952d6SSatish Balay   }
8493a40ed3dSBarry Smith   PetscFunctionReturn(0);
85057b952d6SSatish Balay }
85157b952d6SSatish Balay 
8525615d1e5SSatish Balay #undef __FUNC__
8535615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
854ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
85579bdfe76SSatish Balay {
85679bdfe76SSatish Balay   Mat         mat = (Mat) obj;
85779bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
85879bdfe76SSatish Balay   int         ierr;
85979bdfe76SSatish Balay 
860d64ed03dSBarry Smith   PetscFunctionBegin;
8613a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
86279bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
86379bdfe76SSatish Balay #endif
86479bdfe76SSatish Balay 
86583e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
86679bdfe76SSatish Balay   PetscFree(baij->rowners);
86779bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
86879bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
86979bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
87079bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
87179bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
87279bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
87379bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
87430793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
87579bdfe76SSatish Balay   PetscFree(baij);
87679bdfe76SSatish Balay   PLogObjectDestroy(mat);
87779bdfe76SSatish Balay   PetscHeaderDestroy(mat);
8783a40ed3dSBarry Smith   PetscFunctionReturn(0);
87979bdfe76SSatish Balay }
88079bdfe76SSatish Balay 
8815615d1e5SSatish Balay #undef __FUNC__
8825615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
883ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
884cee3aa6bSSatish Balay {
885cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
88647b4a8eaSLois Curfman McInnes   int         ierr, nt;
887cee3aa6bSSatish Balay 
888d64ed03dSBarry Smith   PetscFunctionBegin;
889c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
89047b4a8eaSLois Curfman McInnes   if (nt != a->n) {
891a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
89247b4a8eaSLois Curfman McInnes   }
893c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
89447b4a8eaSLois Curfman McInnes   if (nt != a->m) {
895a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
89647b4a8eaSLois Curfman McInnes   }
89743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
898cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
89943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
900cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
90143a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
9023a40ed3dSBarry Smith   PetscFunctionReturn(0);
903cee3aa6bSSatish Balay }
904cee3aa6bSSatish Balay 
9055615d1e5SSatish Balay #undef __FUNC__
9065615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
907ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
908cee3aa6bSSatish Balay {
909cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
910cee3aa6bSSatish Balay   int        ierr;
911d64ed03dSBarry Smith 
912d64ed03dSBarry Smith   PetscFunctionBegin;
91343a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
914cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
91543a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
916cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
9173a40ed3dSBarry Smith   PetscFunctionReturn(0);
918cee3aa6bSSatish Balay }
919cee3aa6bSSatish Balay 
9205615d1e5SSatish Balay #undef __FUNC__
9215615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
922ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
923cee3aa6bSSatish Balay {
924cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
925cee3aa6bSSatish Balay   int         ierr;
926cee3aa6bSSatish Balay 
927d64ed03dSBarry Smith   PetscFunctionBegin;
928cee3aa6bSSatish Balay   /* do nondiagonal part */
929cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
930cee3aa6bSSatish Balay   /* send it on its way */
931537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
932cee3aa6bSSatish Balay   /* do local part */
933cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
934cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
935cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
936cee3aa6bSSatish Balay   /* but is not perhaps always true. */
937639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
9383a40ed3dSBarry Smith   PetscFunctionReturn(0);
939cee3aa6bSSatish Balay }
940cee3aa6bSSatish Balay 
9415615d1e5SSatish Balay #undef __FUNC__
9425615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
943ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
944cee3aa6bSSatish Balay {
945cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
946cee3aa6bSSatish Balay   int         ierr;
947cee3aa6bSSatish Balay 
948d64ed03dSBarry Smith   PetscFunctionBegin;
949cee3aa6bSSatish Balay   /* do nondiagonal part */
950cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
951cee3aa6bSSatish Balay   /* send it on its way */
952537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
953cee3aa6bSSatish Balay   /* do local part */
954cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
955cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
956cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
957cee3aa6bSSatish Balay   /* but is not perhaps always true. */
958537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
9593a40ed3dSBarry Smith   PetscFunctionReturn(0);
960cee3aa6bSSatish Balay }
961cee3aa6bSSatish Balay 
962cee3aa6bSSatish Balay /*
963cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
964cee3aa6bSSatish Balay    diagonal block
965cee3aa6bSSatish Balay */
9665615d1e5SSatish Balay #undef __FUNC__
9675615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
968ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
969cee3aa6bSSatish Balay {
970cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
9713a40ed3dSBarry Smith   int         ierr;
972d64ed03dSBarry Smith 
973d64ed03dSBarry Smith   PetscFunctionBegin;
974a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
9753a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
9763a40ed3dSBarry Smith   PetscFunctionReturn(0);
977cee3aa6bSSatish Balay }
978cee3aa6bSSatish Balay 
9795615d1e5SSatish Balay #undef __FUNC__
9805615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
981ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
982cee3aa6bSSatish Balay {
983cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
984cee3aa6bSSatish Balay   int         ierr;
985d64ed03dSBarry Smith 
986d64ed03dSBarry Smith   PetscFunctionBegin;
987cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
988cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
9893a40ed3dSBarry Smith   PetscFunctionReturn(0);
990cee3aa6bSSatish Balay }
991026e39d0SSatish Balay 
9925615d1e5SSatish Balay #undef __FUNC__
9935615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
994ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
99557b952d6SSatish Balay {
99657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
997d64ed03dSBarry Smith 
998d64ed03dSBarry Smith   PetscFunctionBegin;
999bd7f49f5SSatish Balay   if (m) *m = mat->M;
1000bd7f49f5SSatish Balay   if (n) *n = mat->N;
10013a40ed3dSBarry Smith   PetscFunctionReturn(0);
100257b952d6SSatish Balay }
100357b952d6SSatish Balay 
10045615d1e5SSatish Balay #undef __FUNC__
10055615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1006ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
100757b952d6SSatish Balay {
100857b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1009d64ed03dSBarry Smith 
1010d64ed03dSBarry Smith   PetscFunctionBegin;
101157b952d6SSatish Balay   *m = mat->m; *n = mat->N;
10123a40ed3dSBarry Smith   PetscFunctionReturn(0);
101357b952d6SSatish Balay }
101457b952d6SSatish Balay 
10155615d1e5SSatish Balay #undef __FUNC__
10165615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1017ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
101857b952d6SSatish Balay {
101957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1020d64ed03dSBarry Smith 
1021d64ed03dSBarry Smith   PetscFunctionBegin;
102257b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
10233a40ed3dSBarry Smith   PetscFunctionReturn(0);
102457b952d6SSatish Balay }
102557b952d6SSatish Balay 
1026acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1027acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1028acdf5bf4SSatish Balay 
10295615d1e5SSatish Balay #undef __FUNC__
10305615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1031acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1032acdf5bf4SSatish Balay {
1033acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1034acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1035acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1036d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1037d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1038acdf5bf4SSatish Balay 
1039d64ed03dSBarry Smith   PetscFunctionBegin;
1040a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1041acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1042acdf5bf4SSatish Balay 
1043acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1044acdf5bf4SSatish Balay     /*
1045acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1046acdf5bf4SSatish Balay     */
1047acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1048bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1049bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1050acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1051acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1052acdf5bf4SSatish Balay     }
1053acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1054acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1055acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1056acdf5bf4SSatish Balay   }
1057acdf5bf4SSatish Balay 
1058a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1059d9d09a02SSatish Balay   lrow = row - brstart;
1060acdf5bf4SSatish Balay 
1061acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1062acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1063acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1064acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1065acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1066acdf5bf4SSatish Balay   nztot = nzA + nzB;
1067acdf5bf4SSatish Balay 
1068acdf5bf4SSatish Balay   cmap  = mat->garray;
1069acdf5bf4SSatish Balay   if (v  || idx) {
1070acdf5bf4SSatish Balay     if (nztot) {
1071acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1072acdf5bf4SSatish Balay       int imark = -1;
1073acdf5bf4SSatish Balay       if (v) {
1074acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1075acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1076d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1077acdf5bf4SSatish Balay           else break;
1078acdf5bf4SSatish Balay         }
1079acdf5bf4SSatish Balay         imark = i;
1080acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1081acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1082acdf5bf4SSatish Balay       }
1083acdf5bf4SSatish Balay       if (idx) {
1084acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1085acdf5bf4SSatish Balay         if (imark > -1) {
1086acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1087bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1088acdf5bf4SSatish Balay           }
1089acdf5bf4SSatish Balay         } else {
1090acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1091d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1092d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1093acdf5bf4SSatish Balay             else break;
1094acdf5bf4SSatish Balay           }
1095acdf5bf4SSatish Balay           imark = i;
1096acdf5bf4SSatish Balay         }
1097d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1098d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1099acdf5bf4SSatish Balay       }
1100d64ed03dSBarry Smith     } else {
1101d212a18eSSatish Balay       if (idx) *idx = 0;
1102d212a18eSSatish Balay       if (v)   *v   = 0;
1103d212a18eSSatish Balay     }
1104acdf5bf4SSatish Balay   }
1105acdf5bf4SSatish Balay   *nz = nztot;
1106acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1107acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
11083a40ed3dSBarry Smith   PetscFunctionReturn(0);
1109acdf5bf4SSatish Balay }
1110acdf5bf4SSatish Balay 
11115615d1e5SSatish Balay #undef __FUNC__
11125615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1113acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1114acdf5bf4SSatish Balay {
1115acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1116d64ed03dSBarry Smith 
1117d64ed03dSBarry Smith   PetscFunctionBegin;
1118acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1119a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1120acdf5bf4SSatish Balay   }
1121acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
11223a40ed3dSBarry Smith   PetscFunctionReturn(0);
1123acdf5bf4SSatish Balay }
1124acdf5bf4SSatish Balay 
11255615d1e5SSatish Balay #undef __FUNC__
11265615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1127ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
11285a838052SSatish Balay {
11295a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1130d64ed03dSBarry Smith 
1131d64ed03dSBarry Smith   PetscFunctionBegin;
11325a838052SSatish Balay   *bs = baij->bs;
11333a40ed3dSBarry Smith   PetscFunctionReturn(0);
11345a838052SSatish Balay }
11355a838052SSatish Balay 
11365615d1e5SSatish Balay #undef __FUNC__
11375615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1138ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
113958667388SSatish Balay {
114058667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
114158667388SSatish Balay   int         ierr;
1142d64ed03dSBarry Smith 
1143d64ed03dSBarry Smith   PetscFunctionBegin;
114458667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
114558667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
11463a40ed3dSBarry Smith   PetscFunctionReturn(0);
114758667388SSatish Balay }
11480ac07820SSatish Balay 
11495615d1e5SSatish Balay #undef __FUNC__
11505615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1151ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
11520ac07820SSatish Balay {
11534e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
11544e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
11557d57db60SLois Curfman McInnes   int         ierr;
11567d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
11570ac07820SSatish Balay 
1158d64ed03dSBarry Smith   PetscFunctionBegin;
11594e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
11604e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
11614e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
11624e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
11634e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
11640ac07820SSatish Balay   if (flag == MAT_LOCAL) {
11654e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
11664e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
11674e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
11684e220ebcSLois Curfman McInnes     info->memory       = isend[3];
11694e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
11700ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1171ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr);
11724e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11734e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11744e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11754e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11764e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11770ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1178ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr);
11794e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11804e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11814e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11824e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11834e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11840ac07820SSatish Balay   }
11855a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
11865a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
11875a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
11885a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
11894e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
11904e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
11914e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
11923a40ed3dSBarry Smith   PetscFunctionReturn(0);
11930ac07820SSatish Balay }
11940ac07820SSatish Balay 
11955615d1e5SSatish Balay #undef __FUNC__
11965615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1197ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
119858667388SSatish Balay {
119958667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
120058667388SSatish Balay 
1201d64ed03dSBarry Smith   PetscFunctionBegin;
120258667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
120358667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
12046da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1205c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
120696854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1207c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1208b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1209b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1210b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1211aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
121258667388SSatish Balay         MatSetOption(a->A,op);
121358667388SSatish Balay         MatSetOption(a->B,op);
1214b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
12156da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
121658667388SSatish Balay              op == MAT_SYMMETRIC ||
121758667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
121858667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
121958667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
122058667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
122158667388SSatish Balay     a->roworiented = 0;
122258667388SSatish Balay     MatSetOption(a->A,op);
122358667388SSatish Balay     MatSetOption(a->B,op);
12242b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
122590f02eecSBarry Smith     a->donotstash = 1;
1226d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1227d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1228d64ed03dSBarry Smith   } else {
1229d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1230d64ed03dSBarry Smith   }
12313a40ed3dSBarry Smith   PetscFunctionReturn(0);
123258667388SSatish Balay }
123358667388SSatish Balay 
12345615d1e5SSatish Balay #undef __FUNC__
12355615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1236ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
12370ac07820SSatish Balay {
12380ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
12390ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
12400ac07820SSatish Balay   Mat        B;
12410ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
12420ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
12430ac07820SSatish Balay   Scalar     *a;
12440ac07820SSatish Balay 
1245d64ed03dSBarry Smith   PetscFunctionBegin;
1246a8c6a408SBarry Smith   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
12470ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
12480ac07820SSatish Balay   CHKERRQ(ierr);
12490ac07820SSatish Balay 
12500ac07820SSatish Balay   /* copy over the A part */
12510ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
12520ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12530ac07820SSatish Balay   row = baij->rstart;
12540ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
12550ac07820SSatish Balay 
12560ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12570ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12580ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12590ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12600ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
12610ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12620ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12630ac07820SSatish Balay         col++; a += bs;
12640ac07820SSatish Balay       }
12650ac07820SSatish Balay     }
12660ac07820SSatish Balay   }
12670ac07820SSatish Balay   /* copy over the B part */
12680ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
12690ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12700ac07820SSatish Balay   row = baij->rstart*bs;
12710ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12720ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12730ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12740ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12750ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
12760ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12770ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12780ac07820SSatish Balay         col++; a += bs;
12790ac07820SSatish Balay       }
12800ac07820SSatish Balay     }
12810ac07820SSatish Balay   }
12820ac07820SSatish Balay   PetscFree(rvals);
12830ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12840ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12850ac07820SSatish Balay 
12860ac07820SSatish Balay   if (matout != PETSC_NULL) {
12870ac07820SSatish Balay     *matout = B;
12880ac07820SSatish Balay   } else {
12890ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
12900ac07820SSatish Balay     PetscFree(baij->rowners);
12910ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
12920ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
12930ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
12940ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
12950ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
12960ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
12970ac07820SSatish Balay     PetscFree(baij);
1298f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
12990ac07820SSatish Balay     PetscHeaderDestroy(B);
13000ac07820SSatish Balay   }
13013a40ed3dSBarry Smith   PetscFunctionReturn(0);
13020ac07820SSatish Balay }
13030e95ebc0SSatish Balay 
13045615d1e5SSatish Balay #undef __FUNC__
13055615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
13060e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
13070e95ebc0SSatish Balay {
13080e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
13090e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
13100e95ebc0SSatish Balay   int ierr,s1,s2,s3;
13110e95ebc0SSatish Balay 
1312d64ed03dSBarry Smith   PetscFunctionBegin;
13130e95ebc0SSatish Balay   if (ll)  {
13140e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
13150e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1316a8c6a408SBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
13170e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
13180e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
13190e95ebc0SSatish Balay   }
1320a8c6a408SBarry Smith   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
13213a40ed3dSBarry Smith   PetscFunctionReturn(0);
13220e95ebc0SSatish Balay }
13230e95ebc0SSatish Balay 
13245615d1e5SSatish Balay #undef __FUNC__
13255615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1326ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
13270ac07820SSatish Balay {
13280ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
13290ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1330*a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
13310ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
13320ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1333*a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
13340ac07820SSatish Balay   MPI_Comm       comm = A->comm;
13350ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
13360ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
13370ac07820SSatish Balay   IS             istmp;
13380ac07820SSatish Balay 
1339d64ed03dSBarry Smith   PetscFunctionBegin;
13400ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
13410ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
13420ac07820SSatish Balay 
13430ac07820SSatish Balay   /*  first count number of contributors to each processor */
13440ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
13450ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
13460ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
13470ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13480ac07820SSatish Balay     idx = rows[i];
13490ac07820SSatish Balay     found = 0;
13500ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
13510ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
13520ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
13530ac07820SSatish Balay       }
13540ac07820SSatish Balay     }
1355a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
13560ac07820SSatish Balay   }
13570ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
13580ac07820SSatish Balay 
13590ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
13600ac07820SSatish Balay   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1361ca161407SBarry Smith   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
13620ac07820SSatish Balay   nrecvs = work[rank];
1363ca161407SBarry Smith   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
13640ac07820SSatish Balay   nmax   = work[rank];
13650ac07820SSatish Balay   PetscFree(work);
13660ac07820SSatish Balay 
13670ac07820SSatish Balay   /* post receives:   */
1368d64ed03dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1369d64ed03dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
13700ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
1371ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
13720ac07820SSatish Balay   }
13730ac07820SSatish Balay 
13740ac07820SSatish Balay   /* do sends:
13750ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
13760ac07820SSatish Balay      the ith processor
13770ac07820SSatish Balay   */
13780ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1379ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
13800ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
13810ac07820SSatish Balay   starts[0] = 0;
13820ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13830ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13840ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
13850ac07820SSatish Balay   }
13860ac07820SSatish Balay   ISRestoreIndices(is,&rows);
13870ac07820SSatish Balay 
13880ac07820SSatish Balay   starts[0] = 0;
13890ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13900ac07820SSatish Balay   count = 0;
13910ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
13920ac07820SSatish Balay     if (procs[i]) {
1393ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
13940ac07820SSatish Balay     }
13950ac07820SSatish Balay   }
13960ac07820SSatish Balay   PetscFree(starts);
13970ac07820SSatish Balay 
13980ac07820SSatish Balay   base = owners[rank]*bs;
13990ac07820SSatish Balay 
14000ac07820SSatish Balay   /*  wait on receives */
14010ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
14020ac07820SSatish Balay   source = lens + nrecvs;
14030ac07820SSatish Balay   count  = nrecvs; slen = 0;
14040ac07820SSatish Balay   while (count) {
1405ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
14060ac07820SSatish Balay     /* unpack receives into our local space */
1407ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
14080ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
14090ac07820SSatish Balay     lens[imdex]  = n;
14100ac07820SSatish Balay     slen += n;
14110ac07820SSatish Balay     count--;
14120ac07820SSatish Balay   }
14130ac07820SSatish Balay   PetscFree(recv_waits);
14140ac07820SSatish Balay 
14150ac07820SSatish Balay   /* move the data into the send scatter */
14160ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
14170ac07820SSatish Balay   count = 0;
14180ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
14190ac07820SSatish Balay     values = rvalues + i*nmax;
14200ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
14210ac07820SSatish Balay       lrows[count++] = values[j] - base;
14220ac07820SSatish Balay     }
14230ac07820SSatish Balay   }
14240ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
14250ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
14260ac07820SSatish Balay 
14270ac07820SSatish Balay   /* actually zap the local rows */
1428029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
14290ac07820SSatish Balay   PLogObjectParent(A,istmp);
1430*a07cd24cSSatish Balay 
1431*a07cd24cSSatish Balay   ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
14320ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
14330ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
14340ac07820SSatish Balay 
1435*a07cd24cSSatish Balay   if (diag) {
1436*a07cd24cSSatish Balay     for ( i = 0; i < slen; i++ ) {
1437*a07cd24cSSatish Balay       row = lrows[i] + rstart_bs;
1438*a07cd24cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1439*a07cd24cSSatish Balay     }
1440*a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1441*a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1442*a07cd24cSSatish Balay   }
1443*a07cd24cSSatish Balay   PetscFree(lrows);
1444*a07cd24cSSatish Balay 
14450ac07820SSatish Balay   /* wait on sends */
14460ac07820SSatish Balay   if (nsends) {
1447d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1448ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
14490ac07820SSatish Balay     PetscFree(send_status);
14500ac07820SSatish Balay   }
14510ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
14520ac07820SSatish Balay 
14533a40ed3dSBarry Smith   PetscFunctionReturn(0);
14540ac07820SSatish Balay }
1455ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
14565615d1e5SSatish Balay #undef __FUNC__
14575615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1458ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1459ba4ca20aSSatish Balay {
1460ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
14613a40ed3dSBarry Smith   int         ierr;
1462ba4ca20aSSatish Balay 
1463d64ed03dSBarry Smith   PetscFunctionBegin;
14643a40ed3dSBarry Smith   if (!a->rank) {
14653a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
14663a40ed3dSBarry Smith   }
14673a40ed3dSBarry Smith   PetscFunctionReturn(0);
1468ba4ca20aSSatish Balay }
14690ac07820SSatish Balay 
14705615d1e5SSatish Balay #undef __FUNC__
14715615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1472ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1473bb5a7306SBarry Smith {
1474bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1475bb5a7306SBarry Smith   int         ierr;
1476d64ed03dSBarry Smith 
1477d64ed03dSBarry Smith   PetscFunctionBegin;
1478bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
14793a40ed3dSBarry Smith   PetscFunctionReturn(0);
1480bb5a7306SBarry Smith }
1481bb5a7306SBarry Smith 
14820ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
14830ac07820SSatish Balay 
148479bdfe76SSatish Balay /* -------------------------------------------------------------------*/
148579bdfe76SSatish Balay static struct _MatOps MatOps = {
1486bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
14874c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
14884c50302cSBarry Smith   0,0,0,0,
14890ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
14900e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
149158667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
14924c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
14934c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
14944c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
149594a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1496d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1497ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1498bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1499ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
150079bdfe76SSatish Balay 
150179bdfe76SSatish Balay 
15025615d1e5SSatish Balay #undef __FUNC__
15035615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
150479bdfe76SSatish Balay /*@C
150579bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
150679bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
150779bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
150879bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
150979bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
151079bdfe76SSatish Balay 
151179bdfe76SSatish Balay    Input Parameters:
151279bdfe76SSatish Balay .  comm - MPI communicator
151379bdfe76SSatish Balay .  bs   - size of blockk
151479bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
151592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
151692e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
151792e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
151892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
151992e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
152079bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
152192e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
152279bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
152379bdfe76SSatish Balay            submatrix  (same for all local rows)
152492e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
152592e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
152692e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
152792e8d321SLois Curfman McInnes            it is zero.
152892e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
152979bdfe76SSatish Balay            submatrix (same for all local rows).
153092e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
153192e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
153292e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
153379bdfe76SSatish Balay 
153479bdfe76SSatish Balay    Output Parameter:
153579bdfe76SSatish Balay .  A - the matrix
153679bdfe76SSatish Balay 
153779bdfe76SSatish Balay    Notes:
153879bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
153979bdfe76SSatish Balay    (possibly both).
154079bdfe76SSatish Balay 
154179bdfe76SSatish Balay    Storage Information:
154279bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
154379bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
154479bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
154579bdfe76SSatish Balay    local matrix (a rectangular submatrix).
154679bdfe76SSatish Balay 
154779bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
154879bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
154979bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
155079bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
155179bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
155279bdfe76SSatish Balay 
155379bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
155479bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
155579bdfe76SSatish Balay 
155679bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
155779bdfe76SSatish Balay $         -------------------
155879bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
155979bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
156079bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
156179bdfe76SSatish Balay $         -------------------
156279bdfe76SSatish Balay $
156379bdfe76SSatish Balay 
156479bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
156579bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
156679bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
156757b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
156879bdfe76SSatish Balay 
1569d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1570d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
157179bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
157292e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
157392e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
15746da5968aSLois Curfman McInnes    matrices.
157579bdfe76SSatish Balay 
157692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
157779bdfe76SSatish Balay 
157879bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
157979bdfe76SSatish Balay @*/
158079bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
158179bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
158279bdfe76SSatish Balay {
158379bdfe76SSatish Balay   Mat          B;
158479bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
15854c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
158679bdfe76SSatish Balay 
1587d64ed03dSBarry Smith   PetscFunctionBegin;
1588a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
15893914022bSBarry Smith 
15903914022bSBarry Smith   MPI_Comm_size(comm,&size);
15913914022bSBarry Smith   if (size == 1) {
15923914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
15933914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
15943914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
15953a40ed3dSBarry Smith     PetscFunctionReturn(0);
15963914022bSBarry Smith   }
15973914022bSBarry Smith 
159879bdfe76SSatish Balay   *A = 0;
1599d4bb536fSBarry Smith   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
160079bdfe76SSatish Balay   PLogObjectCreate(B);
160179bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
160279bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
160379bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
16044c50302cSBarry Smith 
160579bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
160679bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
160790f02eecSBarry Smith   B->mapping    = 0;
160879bdfe76SSatish Balay   B->factor     = 0;
160979bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
161079bdfe76SSatish Balay 
1611e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
161279bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
161379bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
161479bdfe76SSatish Balay 
1615d64ed03dSBarry Smith   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1616a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1617d64ed03dSBarry Smith   }
1618a8c6a408SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1619a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1620a8c6a408SBarry Smith   }
1621a8c6a408SBarry Smith   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1622a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1623a8c6a408SBarry Smith   }
1624cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1625cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
162679bdfe76SSatish Balay 
162779bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
162879bdfe76SSatish Balay     work[0] = m; work[1] = n;
162979bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
1630ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
163179bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
163279bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
163379bdfe76SSatish Balay   }
163479bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
163579bdfe76SSatish Balay     Mbs = M/bs;
1636a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
163779bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
163879bdfe76SSatish Balay     m   = mbs*bs;
163979bdfe76SSatish Balay   }
164079bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
164179bdfe76SSatish Balay     Nbs = N/bs;
1642a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
164379bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
164479bdfe76SSatish Balay     n   = nbs*bs;
164579bdfe76SSatish Balay   }
1646a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
1647a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1648a8c6a408SBarry Smith   }
164979bdfe76SSatish Balay 
165079bdfe76SSatish Balay   b->m = m; B->m = m;
165179bdfe76SSatish Balay   b->n = n; B->n = n;
165279bdfe76SSatish Balay   b->N = N; B->N = N;
165379bdfe76SSatish Balay   b->M = M; B->M = M;
165479bdfe76SSatish Balay   b->bs  = bs;
165579bdfe76SSatish Balay   b->bs2 = bs*bs;
165679bdfe76SSatish Balay   b->mbs = mbs;
165779bdfe76SSatish Balay   b->nbs = nbs;
165879bdfe76SSatish Balay   b->Mbs = Mbs;
165979bdfe76SSatish Balay   b->Nbs = Nbs;
166079bdfe76SSatish Balay 
166179bdfe76SSatish Balay   /* build local table of row and column ownerships */
166279bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1663f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
16640ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
1665ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
166679bdfe76SSatish Balay   b->rowners[0] = 0;
166779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
166879bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
166979bdfe76SSatish Balay   }
167079bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
167179bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
16724fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
16734fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
16744fa0d573SSatish Balay 
1675ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
167679bdfe76SSatish Balay   b->cowners[0] = 0;
167779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
167879bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
167979bdfe76SSatish Balay   }
168079bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
168179bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
16824fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
16834fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
168479bdfe76SSatish Balay 
1685*a07cd24cSSatish Balay 
168679bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1687029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
168879bdfe76SSatish Balay   PLogObjectParent(B,b->A);
168979bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1690029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
169179bdfe76SSatish Balay   PLogObjectParent(B,b->B);
169279bdfe76SSatish Balay 
169379bdfe76SSatish Balay   /* build cache for off array entries formed */
169479bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
169590f02eecSBarry Smith   b->donotstash  = 0;
169679bdfe76SSatish Balay   b->colmap      = 0;
169779bdfe76SSatish Balay   b->garray      = 0;
169879bdfe76SSatish Balay   b->roworiented = 1;
169979bdfe76SSatish Balay 
170030793edcSSatish Balay   /* stuff used in block assembly */
170130793edcSSatish Balay   b->barray       = 0;
170230793edcSSatish Balay 
170379bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
170479bdfe76SSatish Balay   b->lvec         = 0;
170579bdfe76SSatish Balay   b->Mvctx        = 0;
170679bdfe76SSatish Balay 
170779bdfe76SSatish Balay   /* stuff for MatGetRow() */
170879bdfe76SSatish Balay   b->rowindices   = 0;
170979bdfe76SSatish Balay   b->rowvalues    = 0;
171079bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
171179bdfe76SSatish Balay 
1712*a07cd24cSSatish Balay   /* hash table stuff */
1713*a07cd24cSSatish Balay   b->ht          = 0;
1714*a07cd24cSSatish Balay 
171579bdfe76SSatish Balay   *A = B;
17163a40ed3dSBarry Smith   PetscFunctionReturn(0);
171779bdfe76SSatish Balay }
1718026e39d0SSatish Balay 
17195615d1e5SSatish Balay #undef __FUNC__
17205615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
17210ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
17220ac07820SSatish Balay {
17230ac07820SSatish Balay   Mat         mat;
17240ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
17250ac07820SSatish Balay   int         ierr, len=0, flg;
17260ac07820SSatish Balay 
1727d64ed03dSBarry Smith   PetscFunctionBegin;
17280ac07820SSatish Balay   *newmat       = 0;
1729d4bb536fSBarry Smith   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
17300ac07820SSatish Balay   PLogObjectCreate(mat);
17310ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
17320ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
17330ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
17340ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
17350ac07820SSatish Balay   mat->factor     = matin->factor;
17360ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
17370ac07820SSatish Balay 
17380ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
17390ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
17400ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
17410ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
17420ac07820SSatish Balay 
17430ac07820SSatish Balay   a->bs  = oldmat->bs;
17440ac07820SSatish Balay   a->bs2 = oldmat->bs2;
17450ac07820SSatish Balay   a->mbs = oldmat->mbs;
17460ac07820SSatish Balay   a->nbs = oldmat->nbs;
17470ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
17480ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
17490ac07820SSatish Balay 
17500ac07820SSatish Balay   a->rstart       = oldmat->rstart;
17510ac07820SSatish Balay   a->rend         = oldmat->rend;
17520ac07820SSatish Balay   a->cstart       = oldmat->cstart;
17530ac07820SSatish Balay   a->cend         = oldmat->cend;
17540ac07820SSatish Balay   a->size         = oldmat->size;
17550ac07820SSatish Balay   a->rank         = oldmat->rank;
1756e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
17570ac07820SSatish Balay   a->rowvalues    = 0;
17580ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
175930793edcSSatish Balay   a->barray       = 0;
17600ac07820SSatish Balay 
17610ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1762f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
17630ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
17640ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
17650ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
17660ac07820SSatish Balay   if (oldmat->colmap) {
17670ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
17680ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
17690ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
17700ac07820SSatish Balay   } else a->colmap = 0;
17710ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
17720ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
17730ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
17740ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
17750ac07820SSatish Balay   } else a->garray = 0;
17760ac07820SSatish Balay 
17770ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
17780ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
17790ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
17800ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
17810ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
17820ac07820SSatish Balay   PLogObjectParent(mat,a->A);
17830ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
17840ac07820SSatish Balay   PLogObjectParent(mat,a->B);
17850ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
17860ac07820SSatish Balay   if (flg) {
17870ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
17880ac07820SSatish Balay   }
17890ac07820SSatish Balay   *newmat = mat;
17903a40ed3dSBarry Smith   PetscFunctionReturn(0);
17910ac07820SSatish Balay }
179257b952d6SSatish Balay 
179357b952d6SSatish Balay #include "sys.h"
179457b952d6SSatish Balay 
17955615d1e5SSatish Balay #undef __FUNC__
17965615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
179757b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
179857b952d6SSatish Balay {
179957b952d6SSatish Balay   Mat          A;
180057b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
180157b952d6SSatish Balay   Scalar       *vals,*buf;
180257b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
180357b952d6SSatish Balay   MPI_Status   status;
1804cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
180557b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
180657b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
180757b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
180857b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
180957b952d6SSatish Balay 
1810d64ed03dSBarry Smith   PetscFunctionBegin;
181157b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
181257b952d6SSatish Balay   bs2  = bs*bs;
181357b952d6SSatish Balay 
181457b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
181557b952d6SSatish Balay   if (!rank) {
181657b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1817e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1818a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1819d64ed03dSBarry Smith     if (header[3] < 0) {
1820a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
1821d64ed03dSBarry Smith     }
18226c5fab8fSBarry Smith   }
1823d64ed03dSBarry Smith 
1824ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
182557b952d6SSatish Balay   M = header[1]; N = header[2];
182657b952d6SSatish Balay 
1827a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
182857b952d6SSatish Balay 
182957b952d6SSatish Balay   /*
183057b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
183157b952d6SSatish Balay      divisible by the blocksize
183257b952d6SSatish Balay   */
183357b952d6SSatish Balay   Mbs        = M/bs;
183457b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
183557b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
183657b952d6SSatish Balay   else                  Mbs++;
183757b952d6SSatish Balay   if (extra_rows &&!rank) {
1838b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
183957b952d6SSatish Balay   }
1840537820f0SBarry Smith 
184157b952d6SSatish Balay   /* determine ownership of all rows */
184257b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
184357b952d6SSatish Balay   m   = mbs * bs;
1844cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1845cee3aa6bSSatish Balay   browners = rowners + size + 1;
1846ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
184757b952d6SSatish Balay   rowners[0] = 0;
1848cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1849cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
185057b952d6SSatish Balay   rstart = rowners[rank];
185157b952d6SSatish Balay   rend   = rowners[rank+1];
185257b952d6SSatish Balay 
185357b952d6SSatish Balay   /* distribute row lengths to all processors */
185457b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
185557b952d6SSatish Balay   if (!rank) {
185657b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
1857e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
185857b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
185957b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1860cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1861ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
186257b952d6SSatish Balay     PetscFree(sndcounts);
1863d64ed03dSBarry Smith   } else {
1864ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
186557b952d6SSatish Balay   }
186657b952d6SSatish Balay 
186757b952d6SSatish Balay   if (!rank) {
186857b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
186957b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
187057b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
187157b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
187257b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
187357b952d6SSatish Balay         procsnz[i] += rowlengths[j];
187457b952d6SSatish Balay       }
187557b952d6SSatish Balay     }
187657b952d6SSatish Balay     PetscFree(rowlengths);
187757b952d6SSatish Balay 
187857b952d6SSatish Balay     /* determine max buffer needed and allocate it */
187957b952d6SSatish Balay     maxnz = 0;
188057b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
188157b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
188257b952d6SSatish Balay     }
188357b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
188457b952d6SSatish Balay 
188557b952d6SSatish Balay     /* read in my part of the matrix column indices  */
188657b952d6SSatish Balay     nz = procsnz[0];
188757b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
188857b952d6SSatish Balay     mycols = ibuf;
1889cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
1890e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
1891cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1892cee3aa6bSSatish Balay 
189357b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
189457b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
189557b952d6SSatish Balay       nz   = procsnz[i];
1896e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
1897ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
189857b952d6SSatish Balay     }
189957b952d6SSatish Balay     /* read in the stuff for the last proc */
190057b952d6SSatish Balay     if ( size != 1 ) {
190157b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
1902e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
190357b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1904ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
190557b952d6SSatish Balay     }
190657b952d6SSatish Balay     PetscFree(cols);
1907d64ed03dSBarry Smith   } else {
190857b952d6SSatish Balay     /* determine buffer space needed for message */
190957b952d6SSatish Balay     nz = 0;
191057b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
191157b952d6SSatish Balay       nz += locrowlens[i];
191257b952d6SSatish Balay     }
191357b952d6SSatish Balay     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
191457b952d6SSatish Balay     mycols = ibuf;
191557b952d6SSatish Balay     /* receive message of column indices*/
1916ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1917ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1918a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
191957b952d6SSatish Balay   }
192057b952d6SSatish Balay 
192157b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1922cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1923cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
192457b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1925cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
192657b952d6SSatish Balay   masked1 = mask    + Mbs;
192757b952d6SSatish Balay   masked2 = masked1 + Mbs;
192857b952d6SSatish Balay   rowcount = 0; nzcount = 0;
192957b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
193057b952d6SSatish Balay     dcount  = 0;
193157b952d6SSatish Balay     odcount = 0;
193257b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
193357b952d6SSatish Balay       kmax = locrowlens[rowcount];
193457b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
193557b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
193657b952d6SSatish Balay         if (!mask[tmp]) {
193757b952d6SSatish Balay           mask[tmp] = 1;
193857b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
193957b952d6SSatish Balay           else masked1[dcount++] = tmp;
194057b952d6SSatish Balay         }
194157b952d6SSatish Balay       }
194257b952d6SSatish Balay       rowcount++;
194357b952d6SSatish Balay     }
1944cee3aa6bSSatish Balay 
194557b952d6SSatish Balay     dlens[i]  = dcount;
194657b952d6SSatish Balay     odlens[i] = odcount;
1947cee3aa6bSSatish Balay 
194857b952d6SSatish Balay     /* zero out the mask elements we set */
194957b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
195057b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
195157b952d6SSatish Balay   }
1952cee3aa6bSSatish Balay 
195357b952d6SSatish Balay   /* create our matrix */
1954537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1955537820f0SBarry Smith          CHKERRQ(ierr);
195657b952d6SSatish Balay   A = *newmat;
19576d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
195857b952d6SSatish Balay 
195957b952d6SSatish Balay   if (!rank) {
196057b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
196157b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
196257b952d6SSatish Balay     nz = procsnz[0];
196357b952d6SSatish Balay     vals = buf;
1964cee3aa6bSSatish Balay     mycols = ibuf;
1965cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
1966e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1967cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1968537820f0SBarry Smith 
196957b952d6SSatish Balay     /* insert into matrix */
197057b952d6SSatish Balay     jj      = rstart*bs;
197157b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
197257b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
197357b952d6SSatish Balay       mycols += locrowlens[i];
197457b952d6SSatish Balay       vals   += locrowlens[i];
197557b952d6SSatish Balay       jj++;
197657b952d6SSatish Balay     }
197757b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
197857b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
197957b952d6SSatish Balay       nz   = procsnz[i];
198057b952d6SSatish Balay       vals = buf;
1981e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
1982ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
198357b952d6SSatish Balay     }
198457b952d6SSatish Balay     /* the last proc */
198557b952d6SSatish Balay     if ( size != 1 ){
198657b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
1987cee3aa6bSSatish Balay       vals = buf;
1988e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
198957b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1990ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
199157b952d6SSatish Balay     }
199257b952d6SSatish Balay     PetscFree(procsnz);
1993d64ed03dSBarry Smith   } else {
199457b952d6SSatish Balay     /* receive numeric values */
199557b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
199657b952d6SSatish Balay 
199757b952d6SSatish Balay     /* receive message of values*/
199857b952d6SSatish Balay     vals   = buf;
1999cee3aa6bSSatish Balay     mycols = ibuf;
2000ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2001ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2002a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
200357b952d6SSatish Balay 
200457b952d6SSatish Balay     /* insert into matrix */
200557b952d6SSatish Balay     jj      = rstart*bs;
2006cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
200757b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
200857b952d6SSatish Balay       mycols += locrowlens[i];
200957b952d6SSatish Balay       vals   += locrowlens[i];
201057b952d6SSatish Balay       jj++;
201157b952d6SSatish Balay     }
201257b952d6SSatish Balay   }
201357b952d6SSatish Balay   PetscFree(locrowlens);
201457b952d6SSatish Balay   PetscFree(buf);
201557b952d6SSatish Balay   PetscFree(ibuf);
201657b952d6SSatish Balay   PetscFree(rowners);
201757b952d6SSatish Balay   PetscFree(dlens);
2018cee3aa6bSSatish Balay   PetscFree(mask);
20196d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
20206d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
20213a40ed3dSBarry Smith   PetscFunctionReturn(0);
202257b952d6SSatish Balay }
202357b952d6SSatish Balay 
202457b952d6SSatish Balay 
2025