xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 52c87ff2747de67dfdd6ccb0bd18d7d7f8d26768)
13914022bSBarry Smith 
2a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
3*52c87ff2SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.77 1997/08/07 14:40:00 bsmith Exp balay $";
479bdfe76SSatish Balay #endif
579bdfe76SSatish Balay 
63369ce9aSBarry Smith #include "pinclude/pviewer.h"
770f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
8c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
979bdfe76SSatish Balay 
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
153b2fbd54SBarry Smith 
16537820f0SBarry Smith /*
17537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
1857b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
1957b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2057b952d6SSatish Balay    length of colmap equals the global matrix length.
2157b952d6SSatish Balay */
225615d1e5SSatish Balay #undef __FUNC__
235615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
2457b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2557b952d6SSatish Balay {
2657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2757b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
28928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
2957b952d6SSatish Balay 
30758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3157b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3257b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
33928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
3457b952d6SSatish Balay   return 0;
3557b952d6SSatish Balay }
3657b952d6SSatish Balay 
3780c1aa95SSatish Balay #define CHUNKSIZE  10
3880c1aa95SSatish Balay 
39f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
4080c1aa95SSatish Balay { \
4180c1aa95SSatish Balay  \
4280c1aa95SSatish Balay     brow = row/bs;  \
4380c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
44ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
4580c1aa95SSatish Balay       bcol = col/bs; \
4680c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
47ab26458aSBarry Smith       low = 0; high = nrow; \
48ab26458aSBarry Smith       while (high-low > 3) { \
49ab26458aSBarry Smith         t = (low+high)/2; \
50ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
51ab26458aSBarry Smith         else              low  = t; \
52ab26458aSBarry Smith       } \
53ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
5480c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
5580c1aa95SSatish Balay         if (rp[_i] == bcol) { \
5680c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
57eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
58eada6651SSatish Balay           else                    *bap  = value;  \
59ac7a638eSSatish Balay           goto a_noinsert; \
6080c1aa95SSatish Balay         } \
6180c1aa95SSatish Balay       } \
6289280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
6311ebbc71SLois Curfman McInnes       else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
6480c1aa95SSatish Balay       if (nrow >= rmax) { \
6580c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
6680c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
6780c1aa95SSatish Balay         Scalar *new_a; \
6880c1aa95SSatish Balay  \
6911ebbc71SLois Curfman McInnes         if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
7089280ab3SLois Curfman McInnes  \
7180c1aa95SSatish Balay         /* malloc new storage space */ \
7280c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
7380c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
7480c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
7580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
7680c1aa95SSatish Balay  \
7780c1aa95SSatish Balay         /* copy over old data into new slots */ \
7880c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
7980c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
8080c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
8180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
8280c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
8380c1aa95SSatish Balay                                                            len*sizeof(int)); \
8480c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
8580c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
8680c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
8780c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
8880c1aa95SSatish Balay         /* free up old matrix storage */ \
8980c1aa95SSatish Balay         PetscFree(a->a);  \
9080c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
9180c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
9280c1aa95SSatish Balay         a->singlemalloc = 1; \
9380c1aa95SSatish Balay  \
9480c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
9680c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
9780c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
9880c1aa95SSatish Balay         a->reallocs++; \
9980c1aa95SSatish Balay         a->nz++; \
10080c1aa95SSatish Balay       } \
10180c1aa95SSatish Balay       N = nrow++ - 1;  \
10280c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
10380c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
10480c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
10580c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
10680c1aa95SSatish Balay       } \
10780c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
10880c1aa95SSatish Balay       rp[_i]                      = bcol;  \
10980c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
110ac7a638eSSatish Balay       a_noinsert:; \
11180c1aa95SSatish Balay     ailen[brow] = nrow; \
11280c1aa95SSatish Balay }
11357b952d6SSatish Balay 
114ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
115ac7a638eSSatish Balay { \
116ac7a638eSSatish Balay  \
117ac7a638eSSatish Balay     brow = row/bs;  \
118ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
119ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
120ac7a638eSSatish Balay       bcol = col/bs; \
121ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
122ac7a638eSSatish Balay       low = 0; high = nrow; \
123ac7a638eSSatish Balay       while (high-low > 3) { \
124ac7a638eSSatish Balay         t = (low+high)/2; \
125ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
126ac7a638eSSatish Balay         else              low  = t; \
127ac7a638eSSatish Balay       } \
128ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
129ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
130ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
131ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
132ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
133ac7a638eSSatish Balay           else                    *bap  = value;  \
134ac7a638eSSatish Balay           goto b_noinsert; \
135ac7a638eSSatish Balay         } \
136ac7a638eSSatish Balay       } \
13789280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
13811ebbc71SLois Curfman McInnes       else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
139ac7a638eSSatish Balay       if (nrow >= rmax) { \
140ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
14174c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
142ac7a638eSSatish Balay         Scalar *new_a; \
143ac7a638eSSatish Balay  \
14411ebbc71SLois Curfman McInnes         if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
14589280ab3SLois Curfman McInnes  \
146ac7a638eSSatish Balay         /* malloc new storage space */ \
14774c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
148ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
149ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
150ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
151ac7a638eSSatish Balay  \
152ac7a638eSSatish Balay         /* copy over old data into new slots */ \
153ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
15474c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
156ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
157ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
158ac7a638eSSatish Balay                                                            len*sizeof(int)); \
159ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
160ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
161ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
162ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
163ac7a638eSSatish Balay         /* free up old matrix storage */ \
16474c639caSSatish Balay         PetscFree(b->a);  \
16574c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
16674c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
16774c639caSSatish Balay         b->singlemalloc = 1; \
168ac7a638eSSatish Balay  \
169ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
17174c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
17274c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
17374c639caSSatish Balay         b->reallocs++; \
17474c639caSSatish Balay         b->nz++; \
175ac7a638eSSatish Balay       } \
176ac7a638eSSatish Balay       N = nrow++ - 1;  \
177ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
178ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
179ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
180ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
181ac7a638eSSatish Balay       } \
182ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
183ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
184ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
185ac7a638eSSatish Balay       b_noinsert:; \
186ac7a638eSSatish Balay     bilen[brow] = nrow; \
187ac7a638eSSatish Balay }
188ac7a638eSSatish Balay 
1895615d1e5SSatish Balay #undef __FUNC__
1905615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
191ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
19257b952d6SSatish Balay {
19357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
19457b952d6SSatish Balay   Scalar      value;
1954fa0d573SSatish Balay   int         ierr,i,j,row,col;
1964fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1974fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1984fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
19957b952d6SSatish Balay 
200eada6651SSatish Balay   /* Some Variables required in the macro */
20180c1aa95SSatish Balay   Mat         A = baij->A;
20280c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
203ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204ac7a638eSSatish Balay   Scalar      *aa=a->a;
205ac7a638eSSatish Balay 
206ac7a638eSSatish Balay   Mat         B = baij->B;
207ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
208ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209ac7a638eSSatish Balay   Scalar      *ba=b->a;
210ac7a638eSSatish Balay 
211ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
212ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
213ac7a638eSSatish Balay   Scalar      *ap,*bap;
21480c1aa95SSatish Balay 
21557b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
216639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
217e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
218e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
219639f9d9dSBarry Smith #endif
22057b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
22157b952d6SSatish Balay       row = im[i] - rstart_orig;
22257b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
22357b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
22457b952d6SSatish Balay           col = in[j] - cstart_orig;
22557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
22780c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
22857b952d6SSatish Balay         }
229639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
230e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
231e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
232639f9d9dSBarry Smith #endif
23357b952d6SSatish Balay         else {
23457b952d6SSatish Balay           if (mat->was_assembled) {
235905e6a2fSBarry Smith             if (!baij->colmap) {
236905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237905e6a2fSBarry Smith             }
238905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
23957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
24057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
24157b952d6SSatish Balay               col =  in[j];
2429bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2439bf004c3SSatish Balay               B = baij->B;
2449bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2459bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2469bf004c3SSatish Balay               ba=b->a;
24757b952d6SSatish Balay             }
24857b952d6SSatish Balay           }
24957b952d6SSatish Balay           else col = in[j];
25057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
252ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25357b952d6SSatish Balay         }
25457b952d6SSatish Balay       }
25557b952d6SSatish Balay     }
25657b952d6SSatish Balay     else {
25790f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
25857b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
25957b952d6SSatish Balay       }
26057b952d6SSatish Balay       else {
26190f02eecSBarry Smith         if (!baij->donotstash) {
26257b952d6SSatish Balay           row = im[i];
26357b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
26457b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
26557b952d6SSatish Balay           }
26657b952d6SSatish Balay         }
26757b952d6SSatish Balay       }
26857b952d6SSatish Balay     }
26990f02eecSBarry Smith   }
27057b952d6SSatish Balay   return 0;
27157b952d6SSatish Balay }
27257b952d6SSatish Balay 
273ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
274ab26458aSBarry Smith #undef __FUNC__
275ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
276ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
277ab26458aSBarry Smith {
278ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27930793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
280abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
281ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
282ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
283ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
284ab26458aSBarry Smith 
28530793edcSSatish Balay 
28630793edcSSatish Balay   if(!barray) {
28747513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
28830793edcSSatish Balay   }
28930793edcSSatish Balay 
290ab26458aSBarry Smith   if (roworiented) {
291ab26458aSBarry Smith     stepval = (n-1)*bs;
292ab26458aSBarry Smith   } else {
293ab26458aSBarry Smith     stepval = (m-1)*bs;
294ab26458aSBarry Smith   }
295ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
296ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
297ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
298ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
299ab26458aSBarry Smith #endif
300ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
301ab26458aSBarry Smith       row = im[i] - rstart;
302ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
30315b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
30415b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
30515b57d14SSatish Balay           barray = v + i*bs2;
30615b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
30715b57d14SSatish Balay           barray = v + j*bs2;
30815b57d14SSatish Balay         } else { /* Here a copy is required */
309ab26458aSBarry Smith           if (roworiented) {
310ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
311ab26458aSBarry Smith           } else {
312ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
313abef11f7SSatish Balay           }
31447513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
31547513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
31630793edcSSatish Balay               *barray++  = *value++;
31747513183SBarry Smith             }
31847513183SBarry Smith           }
31930793edcSSatish Balay           barray -=bs2;
32015b57d14SSatish Balay         }
321abef11f7SSatish Balay 
322abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
323abef11f7SSatish Balay           col  = in[j] - cstart;
32430793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
325ab26458aSBarry Smith         }
326ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
327ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
32847513183SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Column too large");}
329ab26458aSBarry Smith #endif
330ab26458aSBarry Smith         else {
331ab26458aSBarry Smith           if (mat->was_assembled) {
332ab26458aSBarry Smith             if (!baij->colmap) {
333ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
334ab26458aSBarry Smith             }
335a5eb4965SSatish Balay 
336a5eb4965SSatish Balay #if defined(PETSC_BOPT_g)
337a5eb4965SSatish Balay             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(1,0,"Incorrect colmap");}
338a5eb4965SSatish Balay #endif
339a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
340ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
341ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
342ab26458aSBarry Smith               col =  in[j];
343ab26458aSBarry Smith             }
344ab26458aSBarry Smith           }
345ab26458aSBarry Smith           else col = in[j];
34630793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
347ab26458aSBarry Smith         }
348ab26458aSBarry Smith       }
349ab26458aSBarry Smith     }
350ab26458aSBarry Smith     else {
351ab26458aSBarry Smith       if (!baij->donotstash) {
352ab26458aSBarry Smith         if (roworiented ) {
353abef11f7SSatish Balay           row   = im[i]*bs;
354abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
355abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
356abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
357abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
358abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
359abef11f7SSatish Balay               }
360ab26458aSBarry Smith             }
361ab26458aSBarry Smith           }
362ab26458aSBarry Smith         }
363ab26458aSBarry Smith         else {
364ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
365abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
366abef11f7SSatish Balay             col   = in[j]*bs;
367abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
368abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
369abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
370ab26458aSBarry Smith               }
371ab26458aSBarry Smith             }
372ab26458aSBarry Smith           }
373abef11f7SSatish Balay         }
374abef11f7SSatish Balay       }
375ab26458aSBarry Smith     }
376ab26458aSBarry Smith   }
377ab26458aSBarry Smith   return 0;
378ab26458aSBarry Smith }
379ab26458aSBarry Smith 
3805615d1e5SSatish Balay #undef __FUNC__
3815615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
382ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
383d6de1c52SSatish Balay {
384d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
385d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
386d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
387d6de1c52SSatish Balay 
388d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
389e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
390e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
391d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
392d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
393d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
394e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
395e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
396d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
397d6de1c52SSatish Balay           col = idxn[j] - bscstart;
398d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
399d6de1c52SSatish Balay         }
400d6de1c52SSatish Balay         else {
401905e6a2fSBarry Smith           if (!baij->colmap) {
402905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
403905e6a2fSBarry Smith           }
404e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
405dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
406d9d09a02SSatish Balay           else {
407dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
408d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
409d6de1c52SSatish Balay           }
410d6de1c52SSatish Balay         }
411d6de1c52SSatish Balay       }
412d9d09a02SSatish Balay     }
413d6de1c52SSatish Balay     else {
414e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
415d6de1c52SSatish Balay     }
416d6de1c52SSatish Balay   }
417d6de1c52SSatish Balay   return 0;
418d6de1c52SSatish Balay }
419d6de1c52SSatish Balay 
4205615d1e5SSatish Balay #undef __FUNC__
4215615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
422ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
423d6de1c52SSatish Balay {
424d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
425d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
426acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
427d6de1c52SSatish Balay   double     sum = 0.0;
428d6de1c52SSatish Balay   Scalar     *v;
429d6de1c52SSatish Balay 
430d6de1c52SSatish Balay   if (baij->size == 1) {
431d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
432d6de1c52SSatish Balay   } else {
433d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
434d6de1c52SSatish Balay       v = amat->a;
435d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
436d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
437d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
438d6de1c52SSatish Balay #else
439d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
440d6de1c52SSatish Balay #endif
441d6de1c52SSatish Balay       }
442d6de1c52SSatish Balay       v = bmat->a;
443d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
444d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
445d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
446d6de1c52SSatish Balay #else
447d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
448d6de1c52SSatish Balay #endif
449d6de1c52SSatish Balay       }
450d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
451d6de1c52SSatish Balay       *norm = sqrt(*norm);
452d6de1c52SSatish Balay     }
453acdf5bf4SSatish Balay     else
454e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
455d6de1c52SSatish Balay   }
456d6de1c52SSatish Balay   return 0;
457d6de1c52SSatish Balay }
45857b952d6SSatish Balay 
4595615d1e5SSatish Balay #undef __FUNC__
4605615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
461ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
46257b952d6SSatish Balay {
46357b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
46457b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
46557b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
46657b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
46757b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
46857b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
46957b952d6SSatish Balay   InsertMode  addv;
47057b952d6SSatish Balay   Scalar      *rvalues,*svalues;
47157b952d6SSatish Balay 
47257b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
473e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
47457b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
475e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
47657b952d6SSatish Balay   }
477e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
47857b952d6SSatish Balay 
47957b952d6SSatish Balay   /*  first count number of contributors to each processor */
48057b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
48157b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
48257b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
48357b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
48457b952d6SSatish Balay     idx = baij->stash.idx[i];
48557b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
48657b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
48757b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
48857b952d6SSatish Balay       }
48957b952d6SSatish Balay     }
49057b952d6SSatish Balay   }
49157b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
49257b952d6SSatish Balay 
49357b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
49457b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
49557b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
49657b952d6SSatish Balay   nreceives = work[rank];
49757b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
49857b952d6SSatish Balay   nmax = work[rank];
49957b952d6SSatish Balay   PetscFree(work);
50057b952d6SSatish Balay 
50157b952d6SSatish Balay   /* post receives:
50257b952d6SSatish Balay        1) each message will consist of ordered pairs
50357b952d6SSatish Balay      (global index,value) we store the global index as a double
50457b952d6SSatish Balay      to simplify the message passing.
50557b952d6SSatish Balay        2) since we don't know how long each individual message is we
50657b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
50757b952d6SSatish Balay      this is a lot of wasted space.
50857b952d6SSatish Balay 
50957b952d6SSatish Balay 
51057b952d6SSatish Balay        This could be done better.
51157b952d6SSatish Balay   */
51257b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
51357b952d6SSatish Balay   CHKPTRQ(rvalues);
51457b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
51557b952d6SSatish Balay   CHKPTRQ(recv_waits);
51657b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
51757b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
51857b952d6SSatish Balay               comm,recv_waits+i);
51957b952d6SSatish Balay   }
52057b952d6SSatish Balay 
52157b952d6SSatish Balay   /* do sends:
52257b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
52357b952d6SSatish Balay          the ith processor
52457b952d6SSatish Balay   */
52557b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
52657b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
52757b952d6SSatish Balay   CHKPTRQ(send_waits);
52857b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
52957b952d6SSatish Balay   starts[0] = 0;
53057b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
53157b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
53257b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
53357b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
53457b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
53557b952d6SSatish Balay   }
53657b952d6SSatish Balay   PetscFree(owner);
53757b952d6SSatish Balay   starts[0] = 0;
53857b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
53957b952d6SSatish Balay   count = 0;
54057b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
54157b952d6SSatish Balay     if (procs[i]) {
54257b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
54357b952d6SSatish Balay                 comm,send_waits+count++);
54457b952d6SSatish Balay     }
54557b952d6SSatish Balay   }
54657b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
54757b952d6SSatish Balay 
54857b952d6SSatish Balay   /* Free cache space */
549d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
55057b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
55157b952d6SSatish Balay 
55257b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
55357b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
55457b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
55557b952d6SSatish Balay   baij->rmax       = nmax;
55657b952d6SSatish Balay 
55757b952d6SSatish Balay   return 0;
55857b952d6SSatish Balay }
559596b8d2eSBarry Smith #include <math.h>
560596b8d2eSBarry Smith #define HASH_KEY 0.6180339887
561596b8d2eSBarry Smith #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))+1)
56257b952d6SSatish Balay 
563596b8d2eSBarry Smith int CreateHashTable(Mat mat)
564596b8d2eSBarry Smith {
565596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
566596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
567596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
568596b8d2eSBarry Smith   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
569*52c87ff2SSatish Balay   int         size=(int)(1.5*nz),ct=0,max=0;
570*52c87ff2SSatish Balay   /* Scalar      *aa=a->a,*ba=b->a; */
571596b8d2eSBarry Smith   double      key;
572596b8d2eSBarry Smith   static double *HT;
573596b8d2eSBarry Smith   static      int flag=1;
574596b8d2eSBarry Smith 
575596b8d2eSBarry Smith 
576596b8d2eSBarry Smith   /* Allocate Memory for Hash Table */
577596b8d2eSBarry Smith   if (flag) {
578596b8d2eSBarry Smith     HT = (double*)PetscMalloc(size*sizeof(double));
579596b8d2eSBarry Smith     flag = 0;
580596b8d2eSBarry Smith   }
581596b8d2eSBarry Smith   PetscMemzero(HT,size*sizeof(double));
582596b8d2eSBarry Smith 
583596b8d2eSBarry Smith   /* Loop Over A */
584596b8d2eSBarry Smith   for ( i=0; i<a->n; i++ ) {
585596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
586596b8d2eSBarry Smith       key = i*baij->n+aj[j]+1;
587596b8d2eSBarry Smith       h1  = HASH1(size,key);
588596b8d2eSBarry Smith 
589596b8d2eSBarry Smith       for ( k=1; k<size; k++ ){
590596b8d2eSBarry Smith         if (HT[(h1*k)%size] == 0.0) {
591596b8d2eSBarry Smith           HT[(h1*k)%size] = key;
592596b8d2eSBarry Smith           break;
593596b8d2eSBarry Smith         } else ct++;
594596b8d2eSBarry Smith       }
595596b8d2eSBarry Smith       if (k> max) max =k;
596596b8d2eSBarry Smith     }
597596b8d2eSBarry Smith   }
598596b8d2eSBarry Smith    printf("***max1 = %d\n",max);
599596b8d2eSBarry Smith   /* Loop Over B */
600596b8d2eSBarry Smith   for ( i=0; i<b->n; i++ ) {
601596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
602596b8d2eSBarry Smith       key = i*b->n+bj[j]+1;
603596b8d2eSBarry Smith       h1  = HASH1(size,key);
604596b8d2eSBarry Smith       for ( k=1; k<size; k++ ){
605596b8d2eSBarry Smith         if (HT[(h1*k)%size] == 0.0) {
606596b8d2eSBarry Smith           HT[(h1*k)%size] = key;
607596b8d2eSBarry Smith           break;
608596b8d2eSBarry Smith         } else ct++;
609596b8d2eSBarry Smith       }
610596b8d2eSBarry Smith       if (k> max) max =k;
611596b8d2eSBarry Smith     }
612596b8d2eSBarry Smith   }
613596b8d2eSBarry Smith 
614596b8d2eSBarry Smith   printf("***max2 = %d\n",max);
615596b8d2eSBarry Smith   /* Print Summary */
616596b8d2eSBarry Smith   for ( i=0,key=0.0,j=0; i<size; i++)
617596b8d2eSBarry Smith     if (HT[i]) {j++;}
618596b8d2eSBarry Smith 
619596b8d2eSBarry Smith   printf("Size %d Average Buckets %d no of Keys %d\n",size,ct,j);
620596b8d2eSBarry Smith   return 0;
621596b8d2eSBarry Smith }
62257b952d6SSatish Balay 
6235615d1e5SSatish Balay #undef __FUNC__
6245615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
625ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
62657b952d6SSatish Balay {
62757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
62857b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
62957b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
630596b8d2eSBarry Smith   int         bs=baij->bs,row,col,other_disassembled,flg;
63157b952d6SSatish Balay   Scalar      *values,val;
632e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
63357b952d6SSatish Balay 
63457b952d6SSatish Balay   /*  wait on receives */
63557b952d6SSatish Balay   while (count) {
63657b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
63757b952d6SSatish Balay     /* unpack receives into our local space */
63857b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
63957b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
64057b952d6SSatish Balay     n = n/3;
64157b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
64257b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
64357b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
64457b952d6SSatish Balay       val = values[3*i+2];
64557b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
64657b952d6SSatish Balay         col -= baij->cstart*bs;
6476fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
64857b952d6SSatish Balay       }
64957b952d6SSatish Balay       else {
65057b952d6SSatish Balay         if (mat->was_assembled) {
651905e6a2fSBarry Smith           if (!baij->colmap) {
652905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
653905e6a2fSBarry Smith           }
654a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
65557b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
65657b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
65757b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
65857b952d6SSatish Balay           }
65957b952d6SSatish Balay         }
6606fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
66157b952d6SSatish Balay       }
66257b952d6SSatish Balay     }
66357b952d6SSatish Balay     count--;
66457b952d6SSatish Balay   }
66557b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
66657b952d6SSatish Balay 
66757b952d6SSatish Balay   /* wait on sends */
66857b952d6SSatish Balay   if (baij->nsends) {
66957b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
67057b952d6SSatish Balay     CHKPTRQ(send_status);
67157b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
67257b952d6SSatish Balay     PetscFree(send_status);
67357b952d6SSatish Balay   }
67457b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
67557b952d6SSatish Balay 
67657b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
67757b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
67857b952d6SSatish Balay 
67957b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
68057b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
68157b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
68257b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
68357b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
68457b952d6SSatish Balay   }
68557b952d6SSatish Balay 
6866d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
68757b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
68857b952d6SSatish Balay   }
68957b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
69057b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
69157b952d6SSatish Balay 
692596b8d2eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr);
693596b8d2eSBarry Smith   if (flg) CreateHashTable(mat);
69457b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
69557b952d6SSatish Balay   return 0;
69657b952d6SSatish Balay }
69757b952d6SSatish Balay 
6985615d1e5SSatish Balay #undef __FUNC__
6995615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
70057b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
70157b952d6SSatish Balay {
70257b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
70357b952d6SSatish Balay   int          ierr;
70457b952d6SSatish Balay 
70557b952d6SSatish Balay   if (baij->size == 1) {
70657b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
70757b952d6SSatish Balay   }
708e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
70957b952d6SSatish Balay   return 0;
71057b952d6SSatish Balay }
71157b952d6SSatish Balay 
7125615d1e5SSatish Balay #undef __FUNC__
7135615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
71457b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
71557b952d6SSatish Balay {
71657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
717cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
71857b952d6SSatish Balay   FILE         *fd;
71957b952d6SSatish Balay   ViewerType   vtype;
72057b952d6SSatish Balay 
72157b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
72257b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
72357b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
724639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
7254e220ebcSLois Curfman McInnes       MatInfo info;
72657b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
72757b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
7284e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
72957b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
73057b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
7314e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
7324e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
7334e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
7344e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
7354e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
7364e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
73757b952d6SSatish Balay       fflush(fd);
73857b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
73957b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
74057b952d6SSatish Balay       return 0;
74157b952d6SSatish Balay     }
742639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
743bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
74457b952d6SSatish Balay       return 0;
74557b952d6SSatish Balay     }
74657b952d6SSatish Balay   }
74757b952d6SSatish Balay 
74857b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
74957b952d6SSatish Balay     Draw       draw;
75057b952d6SSatish Balay     PetscTruth isnull;
75157b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
75257b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
75357b952d6SSatish Balay   }
75457b952d6SSatish Balay 
75557b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
75657b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
75757b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
75857b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
75957b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
76057b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
76157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
76257b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
76357b952d6SSatish Balay     fflush(fd);
76457b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
76557b952d6SSatish Balay   }
76657b952d6SSatish Balay   else {
76757b952d6SSatish Balay     int size = baij->size;
76857b952d6SSatish Balay     rank = baij->rank;
76957b952d6SSatish Balay     if (size == 1) {
77057b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
77157b952d6SSatish Balay     }
77257b952d6SSatish Balay     else {
77357b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
77457b952d6SSatish Balay       Mat         A;
77557b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
77657b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
77757b952d6SSatish Balay       int         mbs=baij->mbs;
77857b952d6SSatish Balay       Scalar      *a;
77957b952d6SSatish Balay 
78057b952d6SSatish Balay       if (!rank) {
781cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
78257b952d6SSatish Balay         CHKERRQ(ierr);
78357b952d6SSatish Balay       }
78457b952d6SSatish Balay       else {
785cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
78657b952d6SSatish Balay         CHKERRQ(ierr);
78757b952d6SSatish Balay       }
78857b952d6SSatish Balay       PLogObjectParent(mat,A);
78957b952d6SSatish Balay 
79057b952d6SSatish Balay       /* copy over the A part */
79157b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
79257b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
79357b952d6SSatish Balay       row = baij->rstart;
79457b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
79557b952d6SSatish Balay 
79657b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
79757b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
79857b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
79957b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
80057b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
80157b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
802cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
803cee3aa6bSSatish Balay             col++; a += bs;
80457b952d6SSatish Balay           }
80557b952d6SSatish Balay         }
80657b952d6SSatish Balay       }
80757b952d6SSatish Balay       /* copy over the B part */
80857b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
80957b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
81057b952d6SSatish Balay       row = baij->rstart*bs;
81157b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
81257b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
81357b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
81457b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
81557b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
81657b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
817cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
818cee3aa6bSSatish Balay             col++; a += bs;
81957b952d6SSatish Balay           }
82057b952d6SSatish Balay         }
82157b952d6SSatish Balay       }
82257b952d6SSatish Balay       PetscFree(rvals);
8236d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8246d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
82557b952d6SSatish Balay       if (!rank) {
82657b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
82757b952d6SSatish Balay       }
82857b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
82957b952d6SSatish Balay     }
83057b952d6SSatish Balay   }
83157b952d6SSatish Balay   return 0;
83257b952d6SSatish Balay }
83357b952d6SSatish Balay 
83457b952d6SSatish Balay 
83557b952d6SSatish Balay 
8365615d1e5SSatish Balay #undef __FUNC__
8375615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
838ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
83957b952d6SSatish Balay {
84057b952d6SSatish Balay   Mat         mat = (Mat) obj;
84157b952d6SSatish Balay   int         ierr;
84257b952d6SSatish Balay   ViewerType  vtype;
84357b952d6SSatish Balay 
84457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
84557b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
84657b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
84757b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
84857b952d6SSatish Balay   }
84957b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
85057b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
85157b952d6SSatish Balay   }
85257b952d6SSatish Balay   return 0;
85357b952d6SSatish Balay }
85457b952d6SSatish Balay 
8555615d1e5SSatish Balay #undef __FUNC__
8565615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
857ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
85879bdfe76SSatish Balay {
85979bdfe76SSatish Balay   Mat         mat = (Mat) obj;
86079bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
86179bdfe76SSatish Balay   int         ierr;
86279bdfe76SSatish Balay 
86379bdfe76SSatish Balay #if defined(PETSC_LOG)
86479bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
86579bdfe76SSatish Balay #endif
86679bdfe76SSatish Balay 
86783e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
86879bdfe76SSatish Balay   PetscFree(baij->rowners);
86979bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
87079bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
87179bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
87279bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
87379bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
87479bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
87579bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
87630793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
87779bdfe76SSatish Balay   PetscFree(baij);
87890f02eecSBarry Smith   if (mat->mapping) {
87990f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
88090f02eecSBarry Smith   }
88179bdfe76SSatish Balay   PLogObjectDestroy(mat);
88279bdfe76SSatish Balay   PetscHeaderDestroy(mat);
88379bdfe76SSatish Balay   return 0;
88479bdfe76SSatish Balay }
88579bdfe76SSatish Balay 
8865615d1e5SSatish Balay #undef __FUNC__
8875615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
888ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
889cee3aa6bSSatish Balay {
890cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
89147b4a8eaSLois Curfman McInnes   int         ierr, nt;
892cee3aa6bSSatish Balay 
893c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
89447b4a8eaSLois Curfman McInnes   if (nt != a->n) {
895ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
89647b4a8eaSLois Curfman McInnes   }
897c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
89847b4a8eaSLois Curfman McInnes   if (nt != a->m) {
899e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
90047b4a8eaSLois Curfman McInnes   }
90143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
902cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
90343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
904cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
90543a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
906cee3aa6bSSatish Balay   return 0;
907cee3aa6bSSatish Balay }
908cee3aa6bSSatish Balay 
9095615d1e5SSatish Balay #undef __FUNC__
9105615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
911ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
912cee3aa6bSSatish Balay {
913cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
914cee3aa6bSSatish Balay   int        ierr;
91543a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
916cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
91743a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
918cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
919cee3aa6bSSatish Balay   return 0;
920cee3aa6bSSatish Balay }
921cee3aa6bSSatish Balay 
9225615d1e5SSatish Balay #undef __FUNC__
9235615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
924ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
925cee3aa6bSSatish Balay {
926cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
927cee3aa6bSSatish Balay   int        ierr;
928cee3aa6bSSatish Balay 
929cee3aa6bSSatish Balay   /* do nondiagonal part */
930cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
931cee3aa6bSSatish Balay   /* send it on its way */
932537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
933cee3aa6bSSatish Balay   /* do local part */
934cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
935cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
936cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
937cee3aa6bSSatish Balay   /* but is not perhaps always true. */
938639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
939cee3aa6bSSatish Balay   return 0;
940cee3aa6bSSatish Balay }
941cee3aa6bSSatish Balay 
9425615d1e5SSatish Balay #undef __FUNC__
9435615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
944ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
945cee3aa6bSSatish Balay {
946cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
947cee3aa6bSSatish Balay   int        ierr;
948cee3aa6bSSatish Balay 
949cee3aa6bSSatish Balay   /* do nondiagonal part */
950cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
951cee3aa6bSSatish Balay   /* send it on its way */
952537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
953cee3aa6bSSatish Balay   /* do local part */
954cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
955cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
956cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
957cee3aa6bSSatish Balay   /* but is not perhaps always true. */
958537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
959cee3aa6bSSatish Balay   return 0;
960cee3aa6bSSatish Balay }
961cee3aa6bSSatish Balay 
962cee3aa6bSSatish Balay /*
963cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
964cee3aa6bSSatish Balay    diagonal block
965cee3aa6bSSatish Balay */
9665615d1e5SSatish Balay #undef __FUNC__
9675615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
968ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
969cee3aa6bSSatish Balay {
970cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
971cee3aa6bSSatish Balay   if (a->M != a->N)
972e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
973cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
974cee3aa6bSSatish Balay }
975cee3aa6bSSatish Balay 
9765615d1e5SSatish Balay #undef __FUNC__
9775615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
978ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
979cee3aa6bSSatish Balay {
980cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
981cee3aa6bSSatish Balay   int        ierr;
982cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
983cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
984cee3aa6bSSatish Balay   return 0;
985cee3aa6bSSatish Balay }
986026e39d0SSatish Balay 
9875615d1e5SSatish Balay #undef __FUNC__
9885615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
989ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
99057b952d6SSatish Balay {
99157b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
99257b952d6SSatish Balay   *m = mat->M; *n = mat->N;
99357b952d6SSatish Balay   return 0;
99457b952d6SSatish Balay }
99557b952d6SSatish Balay 
9965615d1e5SSatish Balay #undef __FUNC__
9975615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
998ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
99957b952d6SSatish Balay {
100057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
100157b952d6SSatish Balay   *m = mat->m; *n = mat->N;
100257b952d6SSatish Balay   return 0;
100357b952d6SSatish Balay }
100457b952d6SSatish Balay 
10055615d1e5SSatish Balay #undef __FUNC__
10065615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1007ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
100857b952d6SSatish Balay {
100957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
101057b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
101157b952d6SSatish Balay   return 0;
101257b952d6SSatish Balay }
101357b952d6SSatish Balay 
1014acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1015acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1016acdf5bf4SSatish Balay 
10175615d1e5SSatish Balay #undef __FUNC__
10185615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1019acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1020acdf5bf4SSatish Balay {
1021acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1022acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1023acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1024d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1025d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1026acdf5bf4SSatish Balay 
1027e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1028acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1029acdf5bf4SSatish Balay 
1030acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1031acdf5bf4SSatish Balay     /*
1032acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1033acdf5bf4SSatish Balay     */
1034acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1035bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1036bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1037acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1038acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1039acdf5bf4SSatish Balay     }
1040acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1041acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1042acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1043acdf5bf4SSatish Balay   }
1044acdf5bf4SSatish Balay 
1045acdf5bf4SSatish Balay 
1046e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
1047d9d09a02SSatish Balay   lrow = row - brstart;
1048acdf5bf4SSatish Balay 
1049acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1050acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1051acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1052acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1053acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1054acdf5bf4SSatish Balay   nztot = nzA + nzB;
1055acdf5bf4SSatish Balay 
1056acdf5bf4SSatish Balay   cmap  = mat->garray;
1057acdf5bf4SSatish Balay   if (v  || idx) {
1058acdf5bf4SSatish Balay     if (nztot) {
1059acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1060acdf5bf4SSatish Balay       int imark = -1;
1061acdf5bf4SSatish Balay       if (v) {
1062acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1063acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1064d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1065acdf5bf4SSatish Balay           else break;
1066acdf5bf4SSatish Balay         }
1067acdf5bf4SSatish Balay         imark = i;
1068acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1069acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1070acdf5bf4SSatish Balay       }
1071acdf5bf4SSatish Balay       if (idx) {
1072acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1073acdf5bf4SSatish Balay         if (imark > -1) {
1074acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1075bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1076acdf5bf4SSatish Balay           }
1077acdf5bf4SSatish Balay         } else {
1078acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1079d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1080d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1081acdf5bf4SSatish Balay             else break;
1082acdf5bf4SSatish Balay           }
1083acdf5bf4SSatish Balay           imark = i;
1084acdf5bf4SSatish Balay         }
1085d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1086d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1087acdf5bf4SSatish Balay       }
1088acdf5bf4SSatish Balay     }
1089d212a18eSSatish Balay     else {
1090d212a18eSSatish Balay       if (idx) *idx = 0;
1091d212a18eSSatish Balay       if (v)   *v   = 0;
1092d212a18eSSatish Balay     }
1093acdf5bf4SSatish Balay   }
1094acdf5bf4SSatish Balay   *nz = nztot;
1095acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1096acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1097acdf5bf4SSatish Balay   return 0;
1098acdf5bf4SSatish Balay }
1099acdf5bf4SSatish Balay 
11005615d1e5SSatish Balay #undef __FUNC__
11015615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1102acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1103acdf5bf4SSatish Balay {
1104acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1105acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1106e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
1107acdf5bf4SSatish Balay   }
1108acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
1109acdf5bf4SSatish Balay   return 0;
1110acdf5bf4SSatish Balay }
1111acdf5bf4SSatish Balay 
11125615d1e5SSatish Balay #undef __FUNC__
11135615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1114ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
11155a838052SSatish Balay {
11165a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
11175a838052SSatish Balay   *bs = baij->bs;
11185a838052SSatish Balay   return 0;
11195a838052SSatish Balay }
11205a838052SSatish Balay 
11215615d1e5SSatish Balay #undef __FUNC__
11225615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1123ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
112458667388SSatish Balay {
112558667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
112658667388SSatish Balay   int         ierr;
112758667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
112858667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
112958667388SSatish Balay   return 0;
113058667388SSatish Balay }
11310ac07820SSatish Balay 
11325615d1e5SSatish Balay #undef __FUNC__
11335615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1134ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
11350ac07820SSatish Balay {
11364e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
11374e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
11387d57db60SLois Curfman McInnes   int         ierr;
11397d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
11400ac07820SSatish Balay 
11414e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
11424e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
11434e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
11444e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
11454e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
11464e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
11474e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
11484e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
11494e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
11500ac07820SSatish Balay   if (flag == MAT_LOCAL) {
11514e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
11524e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
11534e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
11544e220ebcSLois Curfman McInnes     info->memory       = isend[3];
11554e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
11560ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1157dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
11584e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11594e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11604e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11614e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11624e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11630ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1164dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
11654e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11664e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11674e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11684e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11694e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11700ac07820SSatish Balay   }
11714e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
11724e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
11734e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
11740ac07820SSatish Balay   return 0;
11750ac07820SSatish Balay }
11760ac07820SSatish Balay 
11775615d1e5SSatish Balay #undef __FUNC__
11785615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1179ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
118058667388SSatish Balay {
118158667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
118258667388SSatish Balay 
118358667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
118458667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
11856da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1186c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
118796854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1188c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1189b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1190b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1191b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1192aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
119358667388SSatish Balay         MatSetOption(a->A,op);
119458667388SSatish Balay         MatSetOption(a->B,op);
1195b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
11966da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
119758667388SSatish Balay              op == MAT_SYMMETRIC ||
119858667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
119958667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
120058667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
120158667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
120258667388SSatish Balay     a->roworiented = 0;
120358667388SSatish Balay     MatSetOption(a->A,op);
120458667388SSatish Balay     MatSetOption(a->B,op);
12052b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
120690f02eecSBarry Smith     a->donotstash = 1;
120790f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1208e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
120958667388SSatish Balay   else
1210e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
121158667388SSatish Balay   return 0;
121258667388SSatish Balay }
121358667388SSatish Balay 
12145615d1e5SSatish Balay #undef __FUNC__
12155615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1216ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
12170ac07820SSatish Balay {
12180ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
12190ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
12200ac07820SSatish Balay   Mat        B;
12210ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
12220ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
12230ac07820SSatish Balay   Scalar     *a;
12240ac07820SSatish Balay 
12250ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1226e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
12270ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
12280ac07820SSatish Balay   CHKERRQ(ierr);
12290ac07820SSatish Balay 
12300ac07820SSatish Balay   /* copy over the A part */
12310ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
12320ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12330ac07820SSatish Balay   row = baij->rstart;
12340ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
12350ac07820SSatish Balay 
12360ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12370ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12380ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12390ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12400ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
12410ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12420ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12430ac07820SSatish Balay         col++; a += bs;
12440ac07820SSatish Balay       }
12450ac07820SSatish Balay     }
12460ac07820SSatish Balay   }
12470ac07820SSatish Balay   /* copy over the B part */
12480ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
12490ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12500ac07820SSatish Balay   row = baij->rstart*bs;
12510ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12520ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12530ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12540ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12550ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
12560ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12570ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12580ac07820SSatish Balay         col++; a += bs;
12590ac07820SSatish Balay       }
12600ac07820SSatish Balay     }
12610ac07820SSatish Balay   }
12620ac07820SSatish Balay   PetscFree(rvals);
12630ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12640ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12650ac07820SSatish Balay 
12660ac07820SSatish Balay   if (matout != PETSC_NULL) {
12670ac07820SSatish Balay     *matout = B;
12680ac07820SSatish Balay   } else {
12690ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
12700ac07820SSatish Balay     PetscFree(baij->rowners);
12710ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
12720ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
12730ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
12740ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
12750ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
12760ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
12770ac07820SSatish Balay     PetscFree(baij);
1278f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
12790ac07820SSatish Balay     PetscHeaderDestroy(B);
12800ac07820SSatish Balay   }
12810ac07820SSatish Balay   return 0;
12820ac07820SSatish Balay }
12830e95ebc0SSatish Balay 
12845615d1e5SSatish Balay #undef __FUNC__
12855615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
12860e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
12870e95ebc0SSatish Balay {
12880e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
12890e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
12900e95ebc0SSatish Balay   int ierr,s1,s2,s3;
12910e95ebc0SSatish Balay 
12920e95ebc0SSatish Balay   if (ll)  {
12930e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
12940e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1295e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
12960e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
12970e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
12980e95ebc0SSatish Balay   }
1299e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
13000e95ebc0SSatish Balay   return 0;
13010e95ebc0SSatish Balay }
13020e95ebc0SSatish Balay 
13030ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
13040ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
13050ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
13060ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
13070ac07820SSatish Balay    routine.
13080ac07820SSatish Balay */
13095615d1e5SSatish Balay #undef __FUNC__
13105615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1311ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
13120ac07820SSatish Balay {
13130ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
13140ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
13150ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
13160ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
13170ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
13180ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
13190ac07820SSatish Balay   MPI_Comm       comm = A->comm;
13200ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
13210ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
13220ac07820SSatish Balay   IS             istmp;
13230ac07820SSatish Balay 
13240ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
13250ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
13260ac07820SSatish Balay 
13270ac07820SSatish Balay   /*  first count number of contributors to each processor */
13280ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
13290ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
13300ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
13310ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13320ac07820SSatish Balay     idx = rows[i];
13330ac07820SSatish Balay     found = 0;
13340ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
13350ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
13360ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
13370ac07820SSatish Balay       }
13380ac07820SSatish Balay     }
1339e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
13400ac07820SSatish Balay   }
13410ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
13420ac07820SSatish Balay 
13430ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
13440ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
13450ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
13460ac07820SSatish Balay   nrecvs = work[rank];
13470ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
13480ac07820SSatish Balay   nmax = work[rank];
13490ac07820SSatish Balay   PetscFree(work);
13500ac07820SSatish Balay 
13510ac07820SSatish Balay   /* post receives:   */
13520ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
13530ac07820SSatish Balay   CHKPTRQ(rvalues);
13540ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
13550ac07820SSatish Balay   CHKPTRQ(recv_waits);
13560ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13570ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
13580ac07820SSatish Balay   }
13590ac07820SSatish Balay 
13600ac07820SSatish Balay   /* do sends:
13610ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
13620ac07820SSatish Balay          the ith processor
13630ac07820SSatish Balay   */
13640ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
13650ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
13660ac07820SSatish Balay   CHKPTRQ(send_waits);
13670ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
13680ac07820SSatish Balay   starts[0] = 0;
13690ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13700ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13710ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
13720ac07820SSatish Balay   }
13730ac07820SSatish Balay   ISRestoreIndices(is,&rows);
13740ac07820SSatish Balay 
13750ac07820SSatish Balay   starts[0] = 0;
13760ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13770ac07820SSatish Balay   count = 0;
13780ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
13790ac07820SSatish Balay     if (procs[i]) {
13800ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
13810ac07820SSatish Balay     }
13820ac07820SSatish Balay   }
13830ac07820SSatish Balay   PetscFree(starts);
13840ac07820SSatish Balay 
13850ac07820SSatish Balay   base = owners[rank]*bs;
13860ac07820SSatish Balay 
13870ac07820SSatish Balay   /*  wait on receives */
13880ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
13890ac07820SSatish Balay   source = lens + nrecvs;
13900ac07820SSatish Balay   count  = nrecvs; slen = 0;
13910ac07820SSatish Balay   while (count) {
13920ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
13930ac07820SSatish Balay     /* unpack receives into our local space */
13940ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
13950ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
13960ac07820SSatish Balay     lens[imdex]  = n;
13970ac07820SSatish Balay     slen += n;
13980ac07820SSatish Balay     count--;
13990ac07820SSatish Balay   }
14000ac07820SSatish Balay   PetscFree(recv_waits);
14010ac07820SSatish Balay 
14020ac07820SSatish Balay   /* move the data into the send scatter */
14030ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
14040ac07820SSatish Balay   count = 0;
14050ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
14060ac07820SSatish Balay     values = rvalues + i*nmax;
14070ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
14080ac07820SSatish Balay       lrows[count++] = values[j] - base;
14090ac07820SSatish Balay     }
14100ac07820SSatish Balay   }
14110ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
14120ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
14130ac07820SSatish Balay 
14140ac07820SSatish Balay   /* actually zap the local rows */
1415029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
14160ac07820SSatish Balay   PLogObjectParent(A,istmp);
14170ac07820SSatish Balay   PetscFree(lrows);
14180ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
14190ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
14200ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
14210ac07820SSatish Balay 
14220ac07820SSatish Balay   /* wait on sends */
14230ac07820SSatish Balay   if (nsends) {
14240ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
14250ac07820SSatish Balay     CHKPTRQ(send_status);
14260ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
14270ac07820SSatish Balay     PetscFree(send_status);
14280ac07820SSatish Balay   }
14290ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
14300ac07820SSatish Balay 
14310ac07820SSatish Balay   return 0;
14320ac07820SSatish Balay }
1433ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
14345615d1e5SSatish Balay #undef __FUNC__
14355615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1436ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1437ba4ca20aSSatish Balay {
1438ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1439ba4ca20aSSatish Balay 
1440ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1441ba4ca20aSSatish Balay   else return 0;
1442ba4ca20aSSatish Balay }
14430ac07820SSatish Balay 
14445615d1e5SSatish Balay #undef __FUNC__
14455615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1446ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1447bb5a7306SBarry Smith {
1448bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1449bb5a7306SBarry Smith   int         ierr;
1450bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1451bb5a7306SBarry Smith   return 0;
1452bb5a7306SBarry Smith }
1453bb5a7306SBarry Smith 
14540ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
14550ac07820SSatish Balay 
145679bdfe76SSatish Balay /* -------------------------------------------------------------------*/
145779bdfe76SSatish Balay static struct _MatOps MatOps = {
1458bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
14594c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
14604c50302cSBarry Smith   0,0,0,0,
14610ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
14620e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
146358667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
14644c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
14654c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
14664c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
146794a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1468d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1469ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1470bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1471ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
147279bdfe76SSatish Balay 
147379bdfe76SSatish Balay 
14745615d1e5SSatish Balay #undef __FUNC__
14755615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
147679bdfe76SSatish Balay /*@C
147779bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
147879bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
147979bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
148079bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
148179bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
148279bdfe76SSatish Balay 
148379bdfe76SSatish Balay    Input Parameters:
148479bdfe76SSatish Balay .  comm - MPI communicator
148579bdfe76SSatish Balay .  bs   - size of blockk
148679bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
148792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
148892e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
148992e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
149092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
149192e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
149279bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
149392e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
149479bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
149579bdfe76SSatish Balay            submatrix  (same for all local rows)
149692e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
149792e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
149892e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
149992e8d321SLois Curfman McInnes            it is zero.
150092e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
150179bdfe76SSatish Balay            submatrix (same for all local rows).
150292e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
150392e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
150492e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
150579bdfe76SSatish Balay 
150679bdfe76SSatish Balay    Output Parameter:
150779bdfe76SSatish Balay .  A - the matrix
150879bdfe76SSatish Balay 
150979bdfe76SSatish Balay    Notes:
151079bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
151179bdfe76SSatish Balay    (possibly both).
151279bdfe76SSatish Balay 
151379bdfe76SSatish Balay    Storage Information:
151479bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
151579bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
151679bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
151779bdfe76SSatish Balay    local matrix (a rectangular submatrix).
151879bdfe76SSatish Balay 
151979bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
152079bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
152179bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
152279bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
152379bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
152479bdfe76SSatish Balay 
152579bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
152679bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
152779bdfe76SSatish Balay 
152879bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
152979bdfe76SSatish Balay $         -------------------
153079bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
153179bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
153279bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
153379bdfe76SSatish Balay $         -------------------
153479bdfe76SSatish Balay $
153579bdfe76SSatish Balay 
153679bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
153779bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
153879bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
153957b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
154079bdfe76SSatish Balay 
154179bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
154279bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
154379bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
154492e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
154592e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
15466da5968aSLois Curfman McInnes    matrices.
154779bdfe76SSatish Balay 
154892e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
154979bdfe76SSatish Balay 
155079bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
155179bdfe76SSatish Balay @*/
155279bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
155379bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
155479bdfe76SSatish Balay {
155579bdfe76SSatish Balay   Mat          B;
155679bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
15574c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
155879bdfe76SSatish Balay 
15593914022bSBarry Smith   if (bs < 1) SETERRQ(1,0,"Invalid block size specified, must be positive");
15603914022bSBarry Smith 
15613914022bSBarry Smith   MPI_Comm_size(comm,&size);
15623914022bSBarry Smith   if (size == 1) {
15633914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
15643914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
15653914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
15663914022bSBarry Smith     return 0;
15673914022bSBarry Smith   }
15683914022bSBarry Smith 
156979bdfe76SSatish Balay   *A = 0;
1570f09e8eb9SSatish Balay   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
157179bdfe76SSatish Balay   PLogObjectCreate(B);
157279bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
157379bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
157479bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
15754c50302cSBarry Smith 
157679bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
157779bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
157890f02eecSBarry Smith   B->mapping    = 0;
157979bdfe76SSatish Balay   B->factor     = 0;
158079bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
158179bdfe76SSatish Balay 
1582e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
158379bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
158479bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
158579bdfe76SSatish Balay 
158679bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1587e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1588e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1589e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1590cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1591cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
159279bdfe76SSatish Balay 
159379bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
159479bdfe76SSatish Balay     work[0] = m; work[1] = n;
159579bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
159679bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
159779bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
159879bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
159979bdfe76SSatish Balay   }
160079bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
160179bdfe76SSatish Balay     Mbs = M/bs;
1602e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
160379bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
160479bdfe76SSatish Balay     m   = mbs*bs;
160579bdfe76SSatish Balay   }
160679bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
160779bdfe76SSatish Balay     Nbs = N/bs;
1608e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
160979bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
161079bdfe76SSatish Balay     n   = nbs*bs;
161179bdfe76SSatish Balay   }
1612e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
161379bdfe76SSatish Balay 
161479bdfe76SSatish Balay   b->m = m; B->m = m;
161579bdfe76SSatish Balay   b->n = n; B->n = n;
161679bdfe76SSatish Balay   b->N = N; B->N = N;
161779bdfe76SSatish Balay   b->M = M; B->M = M;
161879bdfe76SSatish Balay   b->bs  = bs;
161979bdfe76SSatish Balay   b->bs2 = bs*bs;
162079bdfe76SSatish Balay   b->mbs = mbs;
162179bdfe76SSatish Balay   b->nbs = nbs;
162279bdfe76SSatish Balay   b->Mbs = Mbs;
162379bdfe76SSatish Balay   b->Nbs = Nbs;
162479bdfe76SSatish Balay 
162579bdfe76SSatish Balay   /* build local table of row and column ownerships */
162679bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1627f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
16280ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
162979bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
163079bdfe76SSatish Balay   b->rowners[0] = 0;
163179bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
163279bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
163379bdfe76SSatish Balay   }
163479bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
163579bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
16364fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
16374fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
16384fa0d573SSatish Balay 
163979bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
164079bdfe76SSatish Balay   b->cowners[0] = 0;
164179bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
164279bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
164379bdfe76SSatish Balay   }
164479bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
164579bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
16464fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
16474fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
164879bdfe76SSatish Balay 
164979bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1650029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
165179bdfe76SSatish Balay   PLogObjectParent(B,b->A);
165279bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1653029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
165479bdfe76SSatish Balay   PLogObjectParent(B,b->B);
165579bdfe76SSatish Balay 
165679bdfe76SSatish Balay   /* build cache for off array entries formed */
165779bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
165890f02eecSBarry Smith   b->donotstash  = 0;
165979bdfe76SSatish Balay   b->colmap      = 0;
166079bdfe76SSatish Balay   b->garray      = 0;
166179bdfe76SSatish Balay   b->roworiented = 1;
166279bdfe76SSatish Balay 
166330793edcSSatish Balay   /* stuff used in block assembly */
166430793edcSSatish Balay   b->barray      = 0;
166530793edcSSatish Balay 
166679bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
166779bdfe76SSatish Balay   b->lvec        = 0;
166879bdfe76SSatish Balay   b->Mvctx       = 0;
166979bdfe76SSatish Balay 
167079bdfe76SSatish Balay   /* stuff for MatGetRow() */
167179bdfe76SSatish Balay   b->rowindices   = 0;
167279bdfe76SSatish Balay   b->rowvalues    = 0;
167379bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
167479bdfe76SSatish Balay 
167579bdfe76SSatish Balay   *A = B;
167679bdfe76SSatish Balay   return 0;
167779bdfe76SSatish Balay }
1678026e39d0SSatish Balay 
16795615d1e5SSatish Balay #undef __FUNC__
16805615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
16810ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
16820ac07820SSatish Balay {
16830ac07820SSatish Balay   Mat         mat;
16840ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
16850ac07820SSatish Balay   int         ierr, len=0, flg;
16860ac07820SSatish Balay 
16870ac07820SSatish Balay   *newmat       = 0;
1688f09e8eb9SSatish Balay   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
16890ac07820SSatish Balay   PLogObjectCreate(mat);
16900ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
16910ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
16920ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
16930ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
16940ac07820SSatish Balay   mat->factor     = matin->factor;
16950ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
16960ac07820SSatish Balay 
16970ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
16980ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
16990ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
17000ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
17010ac07820SSatish Balay 
17020ac07820SSatish Balay   a->bs  = oldmat->bs;
17030ac07820SSatish Balay   a->bs2 = oldmat->bs2;
17040ac07820SSatish Balay   a->mbs = oldmat->mbs;
17050ac07820SSatish Balay   a->nbs = oldmat->nbs;
17060ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
17070ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
17080ac07820SSatish Balay 
17090ac07820SSatish Balay   a->rstart       = oldmat->rstart;
17100ac07820SSatish Balay   a->rend         = oldmat->rend;
17110ac07820SSatish Balay   a->cstart       = oldmat->cstart;
17120ac07820SSatish Balay   a->cend         = oldmat->cend;
17130ac07820SSatish Balay   a->size         = oldmat->size;
17140ac07820SSatish Balay   a->rank         = oldmat->rank;
1715e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
17160ac07820SSatish Balay   a->rowvalues    = 0;
17170ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
171830793edcSSatish Balay   a->barray       = 0;
17190ac07820SSatish Balay 
17200ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1721f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
17220ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
17230ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
17240ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
17250ac07820SSatish Balay   if (oldmat->colmap) {
17260ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
17270ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
17280ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
17290ac07820SSatish Balay   } else a->colmap = 0;
17300ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
17310ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
17320ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
17330ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
17340ac07820SSatish Balay   } else a->garray = 0;
17350ac07820SSatish Balay 
17360ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
17370ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
17380ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
17390ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
17400ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
17410ac07820SSatish Balay   PLogObjectParent(mat,a->A);
17420ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
17430ac07820SSatish Balay   PLogObjectParent(mat,a->B);
17440ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
17450ac07820SSatish Balay   if (flg) {
17460ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
17470ac07820SSatish Balay   }
17480ac07820SSatish Balay   *newmat = mat;
17490ac07820SSatish Balay   return 0;
17500ac07820SSatish Balay }
175157b952d6SSatish Balay 
175257b952d6SSatish Balay #include "sys.h"
175357b952d6SSatish Balay 
17545615d1e5SSatish Balay #undef __FUNC__
17555615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
175657b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
175757b952d6SSatish Balay {
175857b952d6SSatish Balay   Mat          A;
175957b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
176057b952d6SSatish Balay   Scalar       *vals,*buf;
176157b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
176257b952d6SSatish Balay   MPI_Status   status;
1763cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
176457b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
176557b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
176657b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
176757b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
176857b952d6SSatish Balay 
176957b952d6SSatish Balay 
177057b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
177157b952d6SSatish Balay   bs2  = bs*bs;
177257b952d6SSatish Balay 
177357b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
177457b952d6SSatish Balay   if (!rank) {
177557b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
177657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1777e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
177857b952d6SSatish Balay   }
177957b952d6SSatish Balay 
178057b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
178157b952d6SSatish Balay   M = header[1]; N = header[2];
178257b952d6SSatish Balay 
1783e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
178457b952d6SSatish Balay 
178557b952d6SSatish Balay   /*
178657b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
178757b952d6SSatish Balay      divisible by the blocksize
178857b952d6SSatish Balay   */
178957b952d6SSatish Balay   Mbs        = M/bs;
179057b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
179157b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
179257b952d6SSatish Balay   else                  Mbs++;
179357b952d6SSatish Balay   if (extra_rows &&!rank) {
1794b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
179557b952d6SSatish Balay   }
1796537820f0SBarry Smith 
179757b952d6SSatish Balay   /* determine ownership of all rows */
179857b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
179957b952d6SSatish Balay   m   = mbs * bs;
1800cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1801cee3aa6bSSatish Balay   browners = rowners + size + 1;
180257b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
180357b952d6SSatish Balay   rowners[0] = 0;
1804cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1805cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
180657b952d6SSatish Balay   rstart = rowners[rank];
180757b952d6SSatish Balay   rend   = rowners[rank+1];
180857b952d6SSatish Balay 
180957b952d6SSatish Balay   /* distribute row lengths to all processors */
181057b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
181157b952d6SSatish Balay   if (!rank) {
181257b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
181357b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
181457b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
181557b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1816cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1817cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
181857b952d6SSatish Balay     PetscFree(sndcounts);
181957b952d6SSatish Balay   }
182057b952d6SSatish Balay   else {
182157b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
182257b952d6SSatish Balay   }
182357b952d6SSatish Balay 
182457b952d6SSatish Balay   if (!rank) {
182557b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
182657b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
182757b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
182857b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
182957b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
183057b952d6SSatish Balay         procsnz[i] += rowlengths[j];
183157b952d6SSatish Balay       }
183257b952d6SSatish Balay     }
183357b952d6SSatish Balay     PetscFree(rowlengths);
183457b952d6SSatish Balay 
183557b952d6SSatish Balay     /* determine max buffer needed and allocate it */
183657b952d6SSatish Balay     maxnz = 0;
183757b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
183857b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
183957b952d6SSatish Balay     }
184057b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
184157b952d6SSatish Balay 
184257b952d6SSatish Balay     /* read in my part of the matrix column indices  */
184357b952d6SSatish Balay     nz = procsnz[0];
184457b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
184557b952d6SSatish Balay     mycols = ibuf;
1846cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
184757b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1848cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1849cee3aa6bSSatish Balay 
185057b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
185157b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
185257b952d6SSatish Balay       nz = procsnz[i];
185357b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
185457b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
185557b952d6SSatish Balay     }
185657b952d6SSatish Balay     /* read in the stuff for the last proc */
185757b952d6SSatish Balay     if ( size != 1 ) {
185857b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
185957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
186057b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1861cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
186257b952d6SSatish Balay     }
186357b952d6SSatish Balay     PetscFree(cols);
186457b952d6SSatish Balay   }
186557b952d6SSatish Balay   else {
186657b952d6SSatish Balay     /* determine buffer space needed for message */
186757b952d6SSatish Balay     nz = 0;
186857b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
186957b952d6SSatish Balay       nz += locrowlens[i];
187057b952d6SSatish Balay     }
187157b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
187257b952d6SSatish Balay     mycols = ibuf;
187357b952d6SSatish Balay     /* receive message of column indices*/
187457b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
187557b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1876e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
187757b952d6SSatish Balay   }
187857b952d6SSatish Balay 
187957b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1880cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1881cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
188257b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1883cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
188457b952d6SSatish Balay   masked1 = mask    + Mbs;
188557b952d6SSatish Balay   masked2 = masked1 + Mbs;
188657b952d6SSatish Balay   rowcount = 0; nzcount = 0;
188757b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
188857b952d6SSatish Balay     dcount  = 0;
188957b952d6SSatish Balay     odcount = 0;
189057b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
189157b952d6SSatish Balay       kmax = locrowlens[rowcount];
189257b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
189357b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
189457b952d6SSatish Balay         if (!mask[tmp]) {
189557b952d6SSatish Balay           mask[tmp] = 1;
189657b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
189757b952d6SSatish Balay           else masked1[dcount++] = tmp;
189857b952d6SSatish Balay         }
189957b952d6SSatish Balay       }
190057b952d6SSatish Balay       rowcount++;
190157b952d6SSatish Balay     }
1902cee3aa6bSSatish Balay 
190357b952d6SSatish Balay     dlens[i]  = dcount;
190457b952d6SSatish Balay     odlens[i] = odcount;
1905cee3aa6bSSatish Balay 
190657b952d6SSatish Balay     /* zero out the mask elements we set */
190757b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
190857b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
190957b952d6SSatish Balay   }
1910cee3aa6bSSatish Balay 
191157b952d6SSatish Balay   /* create our matrix */
1912537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1913537820f0SBarry Smith          CHKERRQ(ierr);
191457b952d6SSatish Balay   A = *newmat;
19156d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
191657b952d6SSatish Balay 
191757b952d6SSatish Balay   if (!rank) {
191857b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
191957b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
192057b952d6SSatish Balay     nz = procsnz[0];
192157b952d6SSatish Balay     vals = buf;
1922cee3aa6bSSatish Balay     mycols = ibuf;
1923cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
192457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1925cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1926537820f0SBarry Smith 
192757b952d6SSatish Balay     /* insert into matrix */
192857b952d6SSatish Balay     jj      = rstart*bs;
192957b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
193057b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
193157b952d6SSatish Balay       mycols += locrowlens[i];
193257b952d6SSatish Balay       vals   += locrowlens[i];
193357b952d6SSatish Balay       jj++;
193457b952d6SSatish Balay     }
193557b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
193657b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
193757b952d6SSatish Balay       nz = procsnz[i];
193857b952d6SSatish Balay       vals = buf;
193957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
194057b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
194157b952d6SSatish Balay     }
194257b952d6SSatish Balay     /* the last proc */
194357b952d6SSatish Balay     if ( size != 1 ){
194457b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1945cee3aa6bSSatish Balay       vals = buf;
194657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
194757b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1948cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
194957b952d6SSatish Balay     }
195057b952d6SSatish Balay     PetscFree(procsnz);
195157b952d6SSatish Balay   }
195257b952d6SSatish Balay   else {
195357b952d6SSatish Balay     /* receive numeric values */
195457b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
195557b952d6SSatish Balay 
195657b952d6SSatish Balay     /* receive message of values*/
195757b952d6SSatish Balay     vals = buf;
1958cee3aa6bSSatish Balay     mycols = ibuf;
195957b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
196057b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1961e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
196257b952d6SSatish Balay 
196357b952d6SSatish Balay     /* insert into matrix */
196457b952d6SSatish Balay     jj      = rstart*bs;
1965cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
196657b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
196757b952d6SSatish Balay       mycols += locrowlens[i];
196857b952d6SSatish Balay       vals   += locrowlens[i];
196957b952d6SSatish Balay       jj++;
197057b952d6SSatish Balay     }
197157b952d6SSatish Balay   }
197257b952d6SSatish Balay   PetscFree(locrowlens);
197357b952d6SSatish Balay   PetscFree(buf);
197457b952d6SSatish Balay   PetscFree(ibuf);
197557b952d6SSatish Balay   PetscFree(rowners);
197657b952d6SSatish Balay   PetscFree(dlens);
1977cee3aa6bSSatish Balay   PetscFree(mask);
19786d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19796d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
198057b952d6SSatish Balay   return 0;
198157b952d6SSatish Balay }
198257b952d6SSatish Balay 
198357b952d6SSatish Balay 
1984