xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision d0a415805d4ada8adc754cff5723cc7354ee7166)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*d0a41580SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.95 1998/01/12 17:17:31 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
143b2fbd54SBarry Smith 
15537820f0SBarry Smith /*
16537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
1757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
1857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
1957b952d6SSatish Balay    length of colmap equals the global matrix length.
2057b952d6SSatish Balay */
215615d1e5SSatish Balay #undef __FUNC__
225615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
2357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2457b952d6SSatish Balay {
2557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
27928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
2857b952d6SSatish Balay 
29d64ed03dSBarry Smith   PetscFunctionBegin;
30758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3157b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3257b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
33928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
343a40ed3dSBarry Smith   PetscFunctionReturn(0);
3557b952d6SSatish Balay }
3657b952d6SSatish Balay 
3780c1aa95SSatish Balay #define CHUNKSIZE  10
3880c1aa95SSatish Balay 
39f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
4080c1aa95SSatish Balay { \
4180c1aa95SSatish Balay  \
4280c1aa95SSatish Balay     brow = row/bs;  \
4380c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
44ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
4580c1aa95SSatish Balay       bcol = col/bs; \
4680c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
47ab26458aSBarry Smith       low = 0; high = nrow; \
48ab26458aSBarry Smith       while (high-low > 3) { \
49ab26458aSBarry Smith         t = (low+high)/2; \
50ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
51ab26458aSBarry Smith         else              low  = t; \
52ab26458aSBarry Smith       } \
53ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
5480c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
5580c1aa95SSatish Balay         if (rp[_i] == bcol) { \
5680c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
57eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
58eada6651SSatish Balay           else                    *bap  = value;  \
59ac7a638eSSatish Balay           goto a_noinsert; \
6080c1aa95SSatish Balay         } \
6180c1aa95SSatish Balay       } \
6289280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
63a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
6480c1aa95SSatish Balay       if (nrow >= rmax) { \
6580c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
6680c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
6780c1aa95SSatish Balay         Scalar *new_a; \
6880c1aa95SSatish Balay  \
69a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
7089280ab3SLois Curfman McInnes  \
7180c1aa95SSatish Balay         /* malloc new storage space */ \
7280c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
7380c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
7480c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
7580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
7680c1aa95SSatish Balay  \
7780c1aa95SSatish Balay         /* copy over old data into new slots */ \
7880c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
7980c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
8080c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
8180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
8280c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
8380c1aa95SSatish Balay                                                            len*sizeof(int)); \
8480c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
8580c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
8680c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
8780c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
8880c1aa95SSatish Balay         /* free up old matrix storage */ \
8980c1aa95SSatish Balay         PetscFree(a->a);  \
9080c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
9180c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
9280c1aa95SSatish Balay         a->singlemalloc = 1; \
9380c1aa95SSatish Balay  \
9480c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
9680c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
9780c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
9880c1aa95SSatish Balay         a->reallocs++; \
9980c1aa95SSatish Balay         a->nz++; \
10080c1aa95SSatish Balay       } \
10180c1aa95SSatish Balay       N = nrow++ - 1;  \
10280c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
10380c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
10480c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
10580c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
10680c1aa95SSatish Balay       } \
10780c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
10880c1aa95SSatish Balay       rp[_i]                      = bcol;  \
10980c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
110ac7a638eSSatish Balay       a_noinsert:; \
11180c1aa95SSatish Balay     ailen[brow] = nrow; \
11280c1aa95SSatish Balay }
11357b952d6SSatish Balay 
114ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
115ac7a638eSSatish Balay { \
116ac7a638eSSatish Balay  \
117ac7a638eSSatish Balay     brow = row/bs;  \
118ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
119ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
120ac7a638eSSatish Balay       bcol = col/bs; \
121ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
122ac7a638eSSatish Balay       low = 0; high = nrow; \
123ac7a638eSSatish Balay       while (high-low > 3) { \
124ac7a638eSSatish Balay         t = (low+high)/2; \
125ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
126ac7a638eSSatish Balay         else              low  = t; \
127ac7a638eSSatish Balay       } \
128ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
129ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
130ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
131ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
132ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
133ac7a638eSSatish Balay           else                    *bap  = value;  \
134ac7a638eSSatish Balay           goto b_noinsert; \
135ac7a638eSSatish Balay         } \
136ac7a638eSSatish Balay       } \
13789280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
138a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139ac7a638eSSatish Balay       if (nrow >= rmax) { \
140ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
14174c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
142ac7a638eSSatish Balay         Scalar *new_a; \
143ac7a638eSSatish Balay  \
144a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
14589280ab3SLois Curfman McInnes  \
146ac7a638eSSatish Balay         /* malloc new storage space */ \
14774c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
148ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
149ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
150ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
151ac7a638eSSatish Balay  \
152ac7a638eSSatish Balay         /* copy over old data into new slots */ \
153ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
15474c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
156ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
157ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
158ac7a638eSSatish Balay                                                            len*sizeof(int)); \
159ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
160ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
161ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
162ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
163ac7a638eSSatish Balay         /* free up old matrix storage */ \
16474c639caSSatish Balay         PetscFree(b->a);  \
16574c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
16674c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
16774c639caSSatish Balay         b->singlemalloc = 1; \
168ac7a638eSSatish Balay  \
169ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
17174c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
17274c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
17374c639caSSatish Balay         b->reallocs++; \
17474c639caSSatish Balay         b->nz++; \
175ac7a638eSSatish Balay       } \
176ac7a638eSSatish Balay       N = nrow++ - 1;  \
177ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
178ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
179ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
180ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
181ac7a638eSSatish Balay       } \
182ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
183ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
184ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
185ac7a638eSSatish Balay       b_noinsert:; \
186ac7a638eSSatish Balay     bilen[brow] = nrow; \
187ac7a638eSSatish Balay }
188ac7a638eSSatish Balay 
1895615d1e5SSatish Balay #undef __FUNC__
1905615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
191ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
19257b952d6SSatish Balay {
19357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
19457b952d6SSatish Balay   Scalar      value;
1954fa0d573SSatish Balay   int         ierr,i,j,row,col;
1964fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1974fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1984fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
19957b952d6SSatish Balay 
200eada6651SSatish Balay   /* Some Variables required in the macro */
20180c1aa95SSatish Balay   Mat         A = baij->A;
20280c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
203ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204ac7a638eSSatish Balay   Scalar      *aa=a->a;
205ac7a638eSSatish Balay 
206ac7a638eSSatish Balay   Mat         B = baij->B;
207ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
208ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209ac7a638eSSatish Balay   Scalar      *ba=b->a;
210ac7a638eSSatish Balay 
211ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
212ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
213ac7a638eSSatish Balay   Scalar      *ap,*bap;
21480c1aa95SSatish Balay 
215d64ed03dSBarry Smith   PetscFunctionBegin;
21657b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
2173a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
218a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
219a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220639f9d9dSBarry Smith #endif
22157b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
22257b952d6SSatish Balay       row = im[i] - rstart_orig;
22357b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
22457b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
22557b952d6SSatish Balay           col = in[j] - cstart_orig;
22657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
22880c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
22957b952d6SSatish Balay         }
2303a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
231a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
232a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
233639f9d9dSBarry Smith #endif
23457b952d6SSatish Balay         else {
23557b952d6SSatish Balay           if (mat->was_assembled) {
236905e6a2fSBarry Smith             if (!baij->colmap) {
237905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238905e6a2fSBarry Smith             }
239905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
24057b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
24157b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
24257b952d6SSatish Balay               col =  in[j];
2439bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2449bf004c3SSatish Balay               B = baij->B;
2459bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2469bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2479bf004c3SSatish Balay               ba=b->a;
24857b952d6SSatish Balay             }
249d64ed03dSBarry Smith           } else col = in[j];
25057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
252ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25357b952d6SSatish Balay         }
25457b952d6SSatish Balay       }
255d64ed03dSBarry Smith     } else {
25690f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
25757b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258d64ed03dSBarry Smith       } else {
25990f02eecSBarry Smith         if (!baij->donotstash) {
26057b952d6SSatish Balay           row = im[i];
26157b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
26257b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
26357b952d6SSatish Balay           }
26457b952d6SSatish Balay         }
26557b952d6SSatish Balay       }
26657b952d6SSatish Balay     }
26790f02eecSBarry Smith   }
2683a40ed3dSBarry Smith   PetscFunctionReturn(0);
26957b952d6SSatish Balay }
27057b952d6SSatish Balay 
271ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
272ab26458aSBarry Smith #undef __FUNC__
273ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
274ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
275ab26458aSBarry Smith {
276ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27730793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
278abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
279ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
280ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
281ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
282ab26458aSBarry Smith 
28330793edcSSatish Balay   if(!barray) {
28447513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
28530793edcSSatish Balay   }
28630793edcSSatish Balay 
287ab26458aSBarry Smith   if (roworiented) {
288ab26458aSBarry Smith     stepval = (n-1)*bs;
289ab26458aSBarry Smith   } else {
290ab26458aSBarry Smith     stepval = (m-1)*bs;
291ab26458aSBarry Smith   }
292ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
2933a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
294a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
295a8c6a408SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
296ab26458aSBarry Smith #endif
297ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
298ab26458aSBarry Smith       row = im[i] - rstart;
299ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
30015b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
30115b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
30215b57d14SSatish Balay           barray = v + i*bs2;
30315b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
30415b57d14SSatish Balay           barray = v + j*bs2;
30515b57d14SSatish Balay         } else { /* Here a copy is required */
306ab26458aSBarry Smith           if (roworiented) {
307ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
308ab26458aSBarry Smith           } else {
309ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
310abef11f7SSatish Balay           }
31147513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
31247513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
31330793edcSSatish Balay               *barray++  = *value++;
31447513183SBarry Smith             }
31547513183SBarry Smith           }
31630793edcSSatish Balay           barray -=bs2;
31715b57d14SSatish Balay         }
318abef11f7SSatish Balay 
319abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
320abef11f7SSatish Balay           col  = in[j] - cstart;
32130793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
322ab26458aSBarry Smith         }
3233a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
324a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
325a8c6a408SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
326ab26458aSBarry Smith #endif
327ab26458aSBarry Smith         else {
328ab26458aSBarry Smith           if (mat->was_assembled) {
329ab26458aSBarry Smith             if (!baij->colmap) {
330ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
331ab26458aSBarry Smith             }
332a5eb4965SSatish Balay 
3333a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
334a8c6a408SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
335a5eb4965SSatish Balay #endif
336a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
337ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
338ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
339ab26458aSBarry Smith               col =  in[j];
340ab26458aSBarry Smith             }
341ab26458aSBarry Smith           }
342ab26458aSBarry Smith           else col = in[j];
34330793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344ab26458aSBarry Smith         }
345ab26458aSBarry Smith       }
346d64ed03dSBarry Smith     } else {
347ab26458aSBarry Smith       if (!baij->donotstash) {
348ab26458aSBarry Smith         if (roworiented ) {
349abef11f7SSatish Balay           row   = im[i]*bs;
350abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
351abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
352abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
353abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
354abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
355abef11f7SSatish Balay               }
356ab26458aSBarry Smith             }
357ab26458aSBarry Smith           }
358d64ed03dSBarry Smith         } else {
359ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
360abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
361abef11f7SSatish Balay             col   = in[j]*bs;
362abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
363abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
364abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
365ab26458aSBarry Smith               }
366ab26458aSBarry Smith             }
367ab26458aSBarry Smith           }
368abef11f7SSatish Balay         }
369abef11f7SSatish Balay       }
370ab26458aSBarry Smith     }
371ab26458aSBarry Smith   }
3723a40ed3dSBarry Smith   PetscFunctionReturn(0);
373ab26458aSBarry Smith }
3740bdbc534SSatish Balay #include <math.h>
3750bdbc534SSatish Balay #define HASH_KEY 0.6180339887
3760bdbc534SSatish Balay #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1)))
377ab26458aSBarry Smith 
3785615d1e5SSatish Balay #undef __FUNC__
3790bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
3800bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
3810bdbc534SSatish Balay {
3820bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3830bdbc534SSatish Balay   int         ierr,i,j,row,col;
3840bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
3850bdbc534SSatish Balay   int         rend_orig=baij->rend_bs;
3860bdbc534SSatish Balay 
3870bdbc534SSatish Balay   int         h1,key,size=baij->ht_size,k,bs=baij->bs;
3880bdbc534SSatish Balay   double      * HT  = baij->ht;
3890bdbc534SSatish Balay   Scalar      ** HD = (Scalar **)(HT + size),value;
3900bdbc534SSatish Balay 
3910bdbc534SSatish Balay 
3920bdbc534SSatish Balay   PetscFunctionBegin;
3930bdbc534SSatish Balay 
3940bdbc534SSatish Balay   if(((Mat_SeqBAIJ*)baij->A->data)->nonew !=1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
3950bdbc534SSatish Balay 
3960bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
3970bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
3980bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
3990bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4000bdbc534SSatish Balay #endif
4010bdbc534SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
4020bdbc534SSatish Balay       row = im[i];
4030bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4040bdbc534SSatish Balay         col = in[j];
4050bdbc534SSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4060bdbc534SSatish Balay         /* Look up into the Hash Table */
4070bdbc534SSatish Balay         key = (row/bs)*baij->Nbs+(col/bs)+1;
4080bdbc534SSatish Balay         h1  = HASH1(size,key);
4090bdbc534SSatish Balay 
4100bdbc534SSatish Balay         for ( k=0; k<size; k++ ){
4110bdbc534SSatish Balay           if (HT[(h1+k)%size] == key) {
4120bdbc534SSatish Balay             if (addv == ADD_VALUES) *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs) += value;
4130bdbc534SSatish Balay             else                    *(HD[(h1+k)%size]+ (col % bs)*bs + row % bs)  = value;
4140bdbc534SSatish Balay             break;
4150bdbc534SSatish Balay           }
4160bdbc534SSatish Balay         }
4170bdbc534SSatish Balay         if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found");
4180bdbc534SSatish Balay       }
4190bdbc534SSatish Balay     } else {
4200bdbc534SSatish Balay       if (roworiented && !baij->donotstash) {
4210bdbc534SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
4220bdbc534SSatish Balay       } else {
4230bdbc534SSatish Balay         if (!baij->donotstash) {
4240bdbc534SSatish Balay           row = im[i];
4250bdbc534SSatish Balay 	  for ( j=0; j<n; j++ ) {
4260bdbc534SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
4270bdbc534SSatish Balay           }
4280bdbc534SSatish Balay         }
4290bdbc534SSatish Balay       }
4300bdbc534SSatish Balay     }
4310bdbc534SSatish Balay   }
4320bdbc534SSatish Balay   PetscFunctionReturn(0);
4330bdbc534SSatish Balay }
4340bdbc534SSatish Balay 
4350bdbc534SSatish Balay #undef __FUNC__
4360bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
4370bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4380bdbc534SSatish Balay {
4390bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4400bdbc534SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
4410bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
442b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
4430bdbc534SSatish Balay 
4440bdbc534SSatish Balay   int         h1,key,size=baij->ht_size;
4450bdbc534SSatish Balay   double      * HT  = baij->ht;
4460bdbc534SSatish Balay   Scalar      ** HD = (Scalar **)(HT + size),*value,*baij_a;
4470bdbc534SSatish Balay 
448*d0a41580SSatish Balay   PetscFunctionBegin;
449*d0a41580SSatish Balay 
450*d0a41580SSatish Balay   if(((Mat_SeqBAIJ*)baij->A->data)->nonew !=1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
4510bdbc534SSatish Balay 
4520bdbc534SSatish Balay   if (roworiented) {
4530bdbc534SSatish Balay     stepval = (n-1)*bs;
4540bdbc534SSatish Balay   } else {
4550bdbc534SSatish Balay     stepval = (m-1)*bs;
4560bdbc534SSatish Balay   }
4570bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
4580bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
4590bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4600bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4610bdbc534SSatish Balay #endif
4620bdbc534SSatish Balay     if (im[i] >= rstart && im[i] < rend) {
4630bdbc534SSatish Balay       row = im[i];
4640bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4650bdbc534SSatish Balay         col = in[j];
4660bdbc534SSatish Balay 
4670bdbc534SSatish Balay         /* Look up into the Hash Table */
4680bdbc534SSatish Balay         key = row*baij->Nbs+col+1;
4690bdbc534SSatish Balay         h1  = HASH1(size,key);
4700bdbc534SSatish Balay 
4710bdbc534SSatish Balay         for ( k=0; k<size; k++ ){
4720bdbc534SSatish Balay           if (HT[(h1+k)%size] == key) {
4730bdbc534SSatish Balay             baij_a = HD[(h1+k)%size];
4740bdbc534SSatish Balay 
4750bdbc534SSatish Balay             if (roworiented) {
4760bdbc534SSatish Balay               value = v + i*(stepval+bs)*bs + j*bs;
477b4cc0f5aSSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval,baij_a++ ) {
478b4cc0f5aSSatish Balay                 for ( jj=0; jj<bs2; jj+=bs ) {
479b4cc0f5aSSatish Balay                   if (addv == ADD_VALUES) baij_a[jj]  += *value++;
480b4cc0f5aSSatish Balay                   else                    baij_a[jj]   = *value++;
481b4cc0f5aSSatish Balay                 }
482b4cc0f5aSSatish Balay               }
483b4cc0f5aSSatish Balay 
4840bdbc534SSatish Balay             } else {
4850bdbc534SSatish Balay               value = v + j*(stepval+bs)*bs + i*bs;
486b4cc0f5aSSatish Balay               for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
4870bdbc534SSatish Balay                 for ( jj=0; jj<bs; jj++ ) {
488b4cc0f5aSSatish Balay                   if (addv == ADD_VALUES) baij_a[jj]  += *value++;
489b4cc0f5aSSatish Balay                   else                    baij_a[jj]   = *value++;
490b4cc0f5aSSatish Balay                 }
4910bdbc534SSatish Balay               }
4920bdbc534SSatish Balay             }
4930bdbc534SSatish Balay             break;
4940bdbc534SSatish Balay           }
4950bdbc534SSatish Balay         }
4960bdbc534SSatish Balay         if ( k==size) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Hastable Entry not found");
4970bdbc534SSatish Balay       }
4980bdbc534SSatish Balay     } else {
4990bdbc534SSatish Balay       if (!baij->donotstash) {
5000bdbc534SSatish Balay         if (roworiented ) {
5010bdbc534SSatish Balay           row   = im[i]*bs;
5020bdbc534SSatish Balay           value = v + i*(stepval+bs)*bs;
5030bdbc534SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
5040bdbc534SSatish Balay             for ( k=0; k<n; k++ ) {
5050bdbc534SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
5060bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
5070bdbc534SSatish Balay               }
5080bdbc534SSatish Balay             }
5090bdbc534SSatish Balay           }
5100bdbc534SSatish Balay         } else {
5110bdbc534SSatish Balay           for ( j=0; j<n; j++ ) {
5120bdbc534SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
5130bdbc534SSatish Balay             col   = in[j]*bs;
5140bdbc534SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
5150bdbc534SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
5160bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
5170bdbc534SSatish Balay               }
5180bdbc534SSatish Balay             }
5190bdbc534SSatish Balay           }
5200bdbc534SSatish Balay         }
5210bdbc534SSatish Balay       }
5220bdbc534SSatish Balay     }
5230bdbc534SSatish Balay   }
5240bdbc534SSatish Balay   PetscFunctionReturn(0);
5250bdbc534SSatish Balay }
5260bdbc534SSatish Balay #undef __FUNC__
5275615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
528ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
529d6de1c52SSatish Balay {
530d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
531d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
532d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
533d6de1c52SSatish Balay 
534d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
535a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
536a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
537d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
538d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
539d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
540a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
541a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
542d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
543d6de1c52SSatish Balay           col = idxn[j] - bscstart;
544d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
545d64ed03dSBarry Smith         } else {
546905e6a2fSBarry Smith           if (!baij->colmap) {
547905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
548905e6a2fSBarry Smith           }
549e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
550dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
551d9d09a02SSatish Balay           else {
552dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
553d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
554d6de1c52SSatish Balay           }
555d6de1c52SSatish Balay         }
556d6de1c52SSatish Balay       }
557d64ed03dSBarry Smith     } else {
558a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
559d6de1c52SSatish Balay     }
560d6de1c52SSatish Balay   }
5613a40ed3dSBarry Smith   PetscFunctionReturn(0);
562d6de1c52SSatish Balay }
563d6de1c52SSatish Balay 
5645615d1e5SSatish Balay #undef __FUNC__
5655615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
566ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
567d6de1c52SSatish Balay {
568d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
569d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
570acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
571d6de1c52SSatish Balay   double     sum = 0.0;
572d6de1c52SSatish Balay   Scalar     *v;
573d6de1c52SSatish Balay 
574d64ed03dSBarry Smith   PetscFunctionBegin;
575d6de1c52SSatish Balay   if (baij->size == 1) {
576d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
577d6de1c52SSatish Balay   } else {
578d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
579d6de1c52SSatish Balay       v = amat->a;
580d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
5813a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
582d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
583d6de1c52SSatish Balay #else
584d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
585d6de1c52SSatish Balay #endif
586d6de1c52SSatish Balay       }
587d6de1c52SSatish Balay       v = bmat->a;
588d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
5893a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
590d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
591d6de1c52SSatish Balay #else
592d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
593d6de1c52SSatish Balay #endif
594d6de1c52SSatish Balay       }
595ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
596d6de1c52SSatish Balay       *norm = sqrt(*norm);
597d64ed03dSBarry Smith     } else {
598e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
599d6de1c52SSatish Balay     }
600d64ed03dSBarry Smith   }
6013a40ed3dSBarry Smith   PetscFunctionReturn(0);
602d6de1c52SSatish Balay }
60357b952d6SSatish Balay 
6045615d1e5SSatish Balay #undef __FUNC__
6055615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
606ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
60757b952d6SSatish Balay {
60857b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
60957b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
61057b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
61157b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
61257b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
61357b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
61457b952d6SSatish Balay   InsertMode  addv;
61557b952d6SSatish Balay   Scalar      *rvalues,*svalues;
61657b952d6SSatish Balay 
617d64ed03dSBarry Smith   PetscFunctionBegin;
61857b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
619ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
62057b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
621a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
62257b952d6SSatish Balay   }
623e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
62457b952d6SSatish Balay 
62557b952d6SSatish Balay   /*  first count number of contributors to each processor */
62657b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
62757b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
62857b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
62957b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
63057b952d6SSatish Balay     idx = baij->stash.idx[i];
63157b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
63257b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
63357b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
63457b952d6SSatish Balay       }
63557b952d6SSatish Balay     }
63657b952d6SSatish Balay   }
63757b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
63857b952d6SSatish Balay 
63957b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
64057b952d6SSatish Balay   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
641ca161407SBarry Smith   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
64257b952d6SSatish Balay   nreceives = work[rank];
643ca161407SBarry Smith   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
64457b952d6SSatish Balay   nmax      = work[rank];
64557b952d6SSatish Balay   PetscFree(work);
64657b952d6SSatish Balay 
64757b952d6SSatish Balay   /* post receives:
64857b952d6SSatish Balay        1) each message will consist of ordered pairs
64957b952d6SSatish Balay      (global index,value) we store the global index as a double
65057b952d6SSatish Balay      to simplify the message passing.
65157b952d6SSatish Balay        2) since we don't know how long each individual message is we
65257b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
65357b952d6SSatish Balay      this is a lot of wasted space.
65457b952d6SSatish Balay 
65557b952d6SSatish Balay 
65657b952d6SSatish Balay        This could be done better.
65757b952d6SSatish Balay   */
658f8abc2e8SBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
659f8abc2e8SBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
66057b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
661ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
66257b952d6SSatish Balay   }
66357b952d6SSatish Balay 
66457b952d6SSatish Balay   /* do sends:
66557b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
66657b952d6SSatish Balay          the ith processor
66757b952d6SSatish Balay   */
66857b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
669d64ed03dSBarry Smith   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
67057b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
67157b952d6SSatish Balay   starts[0] = 0;
67257b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
67357b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
67457b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
67557b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
67657b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
67757b952d6SSatish Balay   }
67857b952d6SSatish Balay   PetscFree(owner);
67957b952d6SSatish Balay   starts[0] = 0;
68057b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
68157b952d6SSatish Balay   count = 0;
68257b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
68357b952d6SSatish Balay     if (procs[i]) {
684ca161407SBarry Smith       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
68557b952d6SSatish Balay     }
68657b952d6SSatish Balay   }
68757b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
68857b952d6SSatish Balay 
68957b952d6SSatish Balay   /* Free cache space */
690cd1fa5fbSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n);
69157b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
69257b952d6SSatish Balay 
69357b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
69457b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
69557b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
69657b952d6SSatish Balay   baij->rmax       = nmax;
69757b952d6SSatish Balay 
6983a40ed3dSBarry Smith   PetscFunctionReturn(0);
69957b952d6SSatish Balay }
700bd7f49f5SSatish Balay 
701*d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
702596b8d2eSBarry Smith {
703596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
704596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
705596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
7060bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
7070bdbc534SSatish Balay   int         size,ct=0,max1=0,max2=0,bs2=baij->bs2,rstart=baij->rstart;
7080bdbc534SSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col;
709a07cd24cSSatish Balay   double      *HT,key;
710a07cd24cSSatish Balay   extern int  PetscGlobalRank;
7110bdbc534SSatish Balay   Scalar      **HD;
712a07cd24cSSatish Balay   /*
713a07cd24cSSatish Balay      Scalar      *aa=a->a,*ba=b->a;
714596b8d2eSBarry Smith      static double *HT;
715596b8d2eSBarry Smith      static      int flag=1;
716a07cd24cSSatish Balay      */
717d64ed03dSBarry Smith   PetscFunctionBegin;
7180bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
7190bdbc534SSatish Balay   size = baij->ht_size;
720596b8d2eSBarry Smith   /* Allocate Memory for Hash Table */
7210bdbc534SSatish Balay   if (baij->ht) {
7220bdbc534SSatish Balay     PetscFunctionReturn(0);
723596b8d2eSBarry Smith   }
7240bdbc534SSatish Balay 
7250bdbc534SSatish Balay   baij->ht = (double*)PetscMalloc(size*(sizeof(double)+sizeof(Scalar*))+1); CHKPTRQ(baij->ht);
726a07cd24cSSatish Balay   HT = baij->ht;
7270bdbc534SSatish Balay   HD = (Scalar**)(HT + size);
7280bdbc534SSatish Balay   PetscMemzero(HT,size*sizeof(double)+sizeof(Scalar*));
7290bdbc534SSatish Balay 
730596b8d2eSBarry Smith 
731596b8d2eSBarry Smith   /* Loop Over A */
7320bdbc534SSatish Balay   for ( i=0; i<a->mbs; i++ ) {
733596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
7340bdbc534SSatish Balay       row = i+rstart;
7350bdbc534SSatish Balay       col = aj[j]+cstart;
736596b8d2eSBarry Smith 
7370bdbc534SSatish Balay       key = row*baij->Nbs + col + 1;
7380bdbc534SSatish Balay       /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */
7390bdbc534SSatish Balay       h1  = HASH1(size,key);
7400bdbc534SSatish Balay 
7410bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7420bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7430bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7440bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
745596b8d2eSBarry Smith           break;
746596b8d2eSBarry Smith         } else ct++;
747596b8d2eSBarry Smith       }
748bd7f49f5SSatish Balay       if (k> max1) max1 = k;
749596b8d2eSBarry Smith     }
750596b8d2eSBarry Smith   }
751596b8d2eSBarry Smith   /* Loop Over B */
7520bdbc534SSatish Balay   for ( i=0; i<b->mbs; i++ ) {
753596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
7540bdbc534SSatish Balay       row = i+rstart;
7550bdbc534SSatish Balay       col = garray[bj[j]];
7560bdbc534SSatish Balay       key = row*baij->Nbs + col + 1;
7570bdbc534SSatish Balay       /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */
758596b8d2eSBarry Smith       h1  = HASH1(size,key);
7590bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7600bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7610bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7620bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
763596b8d2eSBarry Smith           break;
764596b8d2eSBarry Smith         } else ct++;
765596b8d2eSBarry Smith       }
766bd7f49f5SSatish Balay       if (k> max2) max2 = k;
767596b8d2eSBarry Smith     }
768596b8d2eSBarry Smith   }
769596b8d2eSBarry Smith 
770596b8d2eSBarry Smith   /* Print Summary */
771596b8d2eSBarry Smith   for ( i=0,key=0.0,j=0; i<size; i++)
772596b8d2eSBarry Smith     if (HT[i]) {j++;}
773596b8d2eSBarry Smith 
7740bdbc534SSatish Balay   /*printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n",
7750bdbc534SSatish Balay          PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j),j); */
7760bdbc534SSatish Balay   fflush(stdout);
7773a40ed3dSBarry Smith   PetscFunctionReturn(0);
778596b8d2eSBarry Smith }
77957b952d6SSatish Balay 
7805615d1e5SSatish Balay #undef __FUNC__
7815615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
782ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
78357b952d6SSatish Balay {
78457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
78557b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
78657b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
787596b8d2eSBarry Smith   int         bs=baij->bs,row,col,other_disassembled,flg;
78857b952d6SSatish Balay   Scalar      *values,val;
789e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
79057b952d6SSatish Balay 
791d64ed03dSBarry Smith   PetscFunctionBegin;
79257b952d6SSatish Balay   /*  wait on receives */
79357b952d6SSatish Balay   while (count) {
794ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
79557b952d6SSatish Balay     /* unpack receives into our local space */
79657b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
797ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
79857b952d6SSatish Balay     n = n/3;
79957b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
80057b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
80157b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
80257b952d6SSatish Balay       val = values[3*i+2];
80357b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
80457b952d6SSatish Balay         col -= baij->cstart*bs;
8056fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
806d64ed03dSBarry Smith       } else {
80757b952d6SSatish Balay         if (mat->was_assembled) {
808905e6a2fSBarry Smith           if (!baij->colmap) {
809905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
810905e6a2fSBarry Smith           }
811a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
81257b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
81357b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
81457b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
81557b952d6SSatish Balay           }
81657b952d6SSatish Balay         }
8176fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
81857b952d6SSatish Balay       }
81957b952d6SSatish Balay     }
82057b952d6SSatish Balay     count--;
82157b952d6SSatish Balay   }
82257b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
82357b952d6SSatish Balay 
82457b952d6SSatish Balay   /* wait on sends */
82557b952d6SSatish Balay   if (baij->nsends) {
826d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
827ca161407SBarry Smith     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
82857b952d6SSatish Balay     PetscFree(send_status);
82957b952d6SSatish Balay   }
83057b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
83157b952d6SSatish Balay 
83257b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
83357b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
83457b952d6SSatish Balay 
83557b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
83657b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
837ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
83857b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
83957b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
84057b952d6SSatish Balay   }
84157b952d6SSatish Balay 
8426d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
84357b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
84457b952d6SSatish Balay   }
84557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
84657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
84757b952d6SSatish Balay 
848596b8d2eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr);
84944e6884eSSatish Balay   if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) {
850a07cd24cSSatish Balay     double fact = 1.39;
851*d0a41580SSatish Balay     MatCreateHashTable_MPIBAIJ_Private(mat,fact);
8520bdbc534SSatish Balay     mat->ops.setvalues        = MatSetValues_MPIBAIJ_HT;
8530bdbc534SSatish Balay     mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
854bd7f49f5SSatish Balay   }
85557b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
8563a40ed3dSBarry Smith   PetscFunctionReturn(0);
85757b952d6SSatish Balay }
85857b952d6SSatish Balay 
8595615d1e5SSatish Balay #undef __FUNC__
8605615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
86157b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
86257b952d6SSatish Balay {
86357b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
86457b952d6SSatish Balay   int          ierr;
86557b952d6SSatish Balay 
866d64ed03dSBarry Smith   PetscFunctionBegin;
86757b952d6SSatish Balay   if (baij->size == 1) {
86857b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
869a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
8703a40ed3dSBarry Smith   PetscFunctionReturn(0);
87157b952d6SSatish Balay }
87257b952d6SSatish Balay 
8735615d1e5SSatish Balay #undef __FUNC__
8745615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
87557b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
87657b952d6SSatish Balay {
87757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
878cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
87957b952d6SSatish Balay   FILE         *fd;
88057b952d6SSatish Balay   ViewerType   vtype;
88157b952d6SSatish Balay 
882d64ed03dSBarry Smith   PetscFunctionBegin;
88357b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
88457b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
88557b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
886639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
8874e220ebcSLois Curfman McInnes       MatInfo info;
88857b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
88957b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
8904e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
89157b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
89257b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
8934e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
8944e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
8954e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
8964e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
8974e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
8984e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
89957b952d6SSatish Balay       fflush(fd);
90057b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
90157b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
9023a40ed3dSBarry Smith       PetscFunctionReturn(0);
903d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
904bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
9053a40ed3dSBarry Smith       PetscFunctionReturn(0);
90657b952d6SSatish Balay     }
90757b952d6SSatish Balay   }
90857b952d6SSatish Balay 
90957b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
91057b952d6SSatish Balay     Draw       draw;
91157b952d6SSatish Balay     PetscTruth isnull;
91257b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
9133a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
91457b952d6SSatish Balay   }
91557b952d6SSatish Balay 
91657b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
91757b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
91857b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
91957b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
92057b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
92157b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
92257b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
92357b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
92457b952d6SSatish Balay     fflush(fd);
92557b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
926d64ed03dSBarry Smith   } else {
92757b952d6SSatish Balay     int size = baij->size;
92857b952d6SSatish Balay     rank = baij->rank;
92957b952d6SSatish Balay     if (size == 1) {
93057b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
931d64ed03dSBarry Smith     } else {
93257b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
93357b952d6SSatish Balay       Mat         A;
93457b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
93557b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
93657b952d6SSatish Balay       int         mbs=baij->mbs;
93757b952d6SSatish Balay       Scalar      *a;
93857b952d6SSatish Balay 
93957b952d6SSatish Balay       if (!rank) {
94055843e3eSBarry Smith         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
941d64ed03dSBarry Smith       } else {
94255843e3eSBarry Smith         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
94357b952d6SSatish Balay       }
94457b952d6SSatish Balay       PLogObjectParent(mat,A);
94557b952d6SSatish Balay 
94657b952d6SSatish Balay       /* copy over the A part */
94757b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
94857b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
94957b952d6SSatish Balay       row = baij->rstart;
95057b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
95157b952d6SSatish Balay 
95257b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
95357b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
95457b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
95557b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
95657b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
95757b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
958cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
959cee3aa6bSSatish Balay             col++; a += bs;
96057b952d6SSatish Balay           }
96157b952d6SSatish Balay         }
96257b952d6SSatish Balay       }
96357b952d6SSatish Balay       /* copy over the B part */
96457b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
96557b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
96657b952d6SSatish Balay       row = baij->rstart*bs;
96757b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
96857b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
96957b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
97057b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
97157b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
97257b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
973cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
974cee3aa6bSSatish Balay             col++; a += bs;
97557b952d6SSatish Balay           }
97657b952d6SSatish Balay         }
97757b952d6SSatish Balay       }
97857b952d6SSatish Balay       PetscFree(rvals);
9796d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
9806d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
98155843e3eSBarry Smith       /*
98255843e3eSBarry Smith          Everyone has to call to draw the matrix since the graphics waits are
98355843e3eSBarry Smith          synchronized across all processors that share the Draw object
98455843e3eSBarry Smith       */
98555843e3eSBarry Smith       if (!rank || vtype == DRAW_VIEWER) {
98657b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
98757b952d6SSatish Balay       }
98857b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
98957b952d6SSatish Balay     }
99057b952d6SSatish Balay   }
9913a40ed3dSBarry Smith   PetscFunctionReturn(0);
99257b952d6SSatish Balay }
99357b952d6SSatish Balay 
99457b952d6SSatish Balay 
99557b952d6SSatish Balay 
9965615d1e5SSatish Balay #undef __FUNC__
9975615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
998ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
99957b952d6SSatish Balay {
100057b952d6SSatish Balay   Mat         mat = (Mat) obj;
100157b952d6SSatish Balay   int         ierr;
100257b952d6SSatish Balay   ViewerType  vtype;
100357b952d6SSatish Balay 
1004d64ed03dSBarry Smith   PetscFunctionBegin;
100557b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
100657b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
100757b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
100857b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
10093a40ed3dSBarry Smith   } else if (vtype == BINARY_FILE_VIEWER) {
10103a40ed3dSBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
101157b952d6SSatish Balay   }
10123a40ed3dSBarry Smith   PetscFunctionReturn(0);
101357b952d6SSatish Balay }
101457b952d6SSatish Balay 
10155615d1e5SSatish Balay #undef __FUNC__
10165615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
1017ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
101879bdfe76SSatish Balay {
101979bdfe76SSatish Balay   Mat         mat = (Mat) obj;
102079bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
102179bdfe76SSatish Balay   int         ierr;
102279bdfe76SSatish Balay 
1023d64ed03dSBarry Smith   PetscFunctionBegin;
10243a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
102579bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
102679bdfe76SSatish Balay #endif
102779bdfe76SSatish Balay 
102883e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
102979bdfe76SSatish Balay   PetscFree(baij->rowners);
103079bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
103179bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
103279bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
103379bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
103479bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
103579bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
103679bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
103730793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
10380bdbc534SSatish Balay   if (baij->ht) PetscFree(baij->ht);
103979bdfe76SSatish Balay   PetscFree(baij);
104079bdfe76SSatish Balay   PLogObjectDestroy(mat);
104179bdfe76SSatish Balay   PetscHeaderDestroy(mat);
10423a40ed3dSBarry Smith   PetscFunctionReturn(0);
104379bdfe76SSatish Balay }
104479bdfe76SSatish Balay 
10455615d1e5SSatish Balay #undef __FUNC__
10465615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
1047ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1048cee3aa6bSSatish Balay {
1049cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
105047b4a8eaSLois Curfman McInnes   int         ierr, nt;
1051cee3aa6bSSatish Balay 
1052d64ed03dSBarry Smith   PetscFunctionBegin;
1053c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
105447b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1055a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
105647b4a8eaSLois Curfman McInnes   }
1057c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
105847b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1059a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
106047b4a8eaSLois Curfman McInnes   }
106143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1062cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
106343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1064cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
106543a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
10663a40ed3dSBarry Smith   PetscFunctionReturn(0);
1067cee3aa6bSSatish Balay }
1068cee3aa6bSSatish Balay 
10695615d1e5SSatish Balay #undef __FUNC__
10705615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
1071ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072cee3aa6bSSatish Balay {
1073cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1074cee3aa6bSSatish Balay   int        ierr;
1075d64ed03dSBarry Smith 
1076d64ed03dSBarry Smith   PetscFunctionBegin;
107743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1078cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
107943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1080cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
10813a40ed3dSBarry Smith   PetscFunctionReturn(0);
1082cee3aa6bSSatish Balay }
1083cee3aa6bSSatish Balay 
10845615d1e5SSatish Balay #undef __FUNC__
10855615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
1086ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1087cee3aa6bSSatish Balay {
1088cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1089cee3aa6bSSatish Balay   int         ierr;
1090cee3aa6bSSatish Balay 
1091d64ed03dSBarry Smith   PetscFunctionBegin;
1092cee3aa6bSSatish Balay   /* do nondiagonal part */
1093cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1094cee3aa6bSSatish Balay   /* send it on its way */
1095537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1096cee3aa6bSSatish Balay   /* do local part */
1097cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1098cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1099cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1100cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1101639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
11023a40ed3dSBarry Smith   PetscFunctionReturn(0);
1103cee3aa6bSSatish Balay }
1104cee3aa6bSSatish Balay 
11055615d1e5SSatish Balay #undef __FUNC__
11065615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1107ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1108cee3aa6bSSatish Balay {
1109cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1110cee3aa6bSSatish Balay   int         ierr;
1111cee3aa6bSSatish Balay 
1112d64ed03dSBarry Smith   PetscFunctionBegin;
1113cee3aa6bSSatish Balay   /* do nondiagonal part */
1114cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1115cee3aa6bSSatish Balay   /* send it on its way */
1116537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1117cee3aa6bSSatish Balay   /* do local part */
1118cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1119cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1120cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1121cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1122537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
11233a40ed3dSBarry Smith   PetscFunctionReturn(0);
1124cee3aa6bSSatish Balay }
1125cee3aa6bSSatish Balay 
1126cee3aa6bSSatish Balay /*
1127cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1128cee3aa6bSSatish Balay    diagonal block
1129cee3aa6bSSatish Balay */
11305615d1e5SSatish Balay #undef __FUNC__
11315615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1132ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1133cee3aa6bSSatish Balay {
1134cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
11353a40ed3dSBarry Smith   int         ierr;
1136d64ed03dSBarry Smith 
1137d64ed03dSBarry Smith   PetscFunctionBegin;
1138a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
11393a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
11403a40ed3dSBarry Smith   PetscFunctionReturn(0);
1141cee3aa6bSSatish Balay }
1142cee3aa6bSSatish Balay 
11435615d1e5SSatish Balay #undef __FUNC__
11445615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
1145ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1146cee3aa6bSSatish Balay {
1147cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1148cee3aa6bSSatish Balay   int         ierr;
1149d64ed03dSBarry Smith 
1150d64ed03dSBarry Smith   PetscFunctionBegin;
1151cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1152cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
11533a40ed3dSBarry Smith   PetscFunctionReturn(0);
1154cee3aa6bSSatish Balay }
1155026e39d0SSatish Balay 
11565615d1e5SSatish Balay #undef __FUNC__
11575615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
1158ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
115957b952d6SSatish Balay {
116057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1161d64ed03dSBarry Smith 
1162d64ed03dSBarry Smith   PetscFunctionBegin;
1163bd7f49f5SSatish Balay   if (m) *m = mat->M;
1164bd7f49f5SSatish Balay   if (n) *n = mat->N;
11653a40ed3dSBarry Smith   PetscFunctionReturn(0);
116657b952d6SSatish Balay }
116757b952d6SSatish Balay 
11685615d1e5SSatish Balay #undef __FUNC__
11695615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1170ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
117157b952d6SSatish Balay {
117257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1173d64ed03dSBarry Smith 
1174d64ed03dSBarry Smith   PetscFunctionBegin;
117557b952d6SSatish Balay   *m = mat->m; *n = mat->N;
11763a40ed3dSBarry Smith   PetscFunctionReturn(0);
117757b952d6SSatish Balay }
117857b952d6SSatish Balay 
11795615d1e5SSatish Balay #undef __FUNC__
11805615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1181ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
118257b952d6SSatish Balay {
118357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1184d64ed03dSBarry Smith 
1185d64ed03dSBarry Smith   PetscFunctionBegin;
118657b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
11873a40ed3dSBarry Smith   PetscFunctionReturn(0);
118857b952d6SSatish Balay }
118957b952d6SSatish Balay 
1190acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1191acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1192acdf5bf4SSatish Balay 
11935615d1e5SSatish Balay #undef __FUNC__
11945615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1195acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1196acdf5bf4SSatish Balay {
1197acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1198acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1199acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1200d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1201d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1202acdf5bf4SSatish Balay 
1203d64ed03dSBarry Smith   PetscFunctionBegin;
1204a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1205acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1206acdf5bf4SSatish Balay 
1207acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1208acdf5bf4SSatish Balay     /*
1209acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1210acdf5bf4SSatish Balay     */
1211acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1212bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1213bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1214acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1215acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1216acdf5bf4SSatish Balay     }
1217acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1218acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1219acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1220acdf5bf4SSatish Balay   }
1221acdf5bf4SSatish Balay 
1222a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1223d9d09a02SSatish Balay   lrow = row - brstart;
1224acdf5bf4SSatish Balay 
1225acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1226acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1227acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1228acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1229acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1230acdf5bf4SSatish Balay   nztot = nzA + nzB;
1231acdf5bf4SSatish Balay 
1232acdf5bf4SSatish Balay   cmap  = mat->garray;
1233acdf5bf4SSatish Balay   if (v  || idx) {
1234acdf5bf4SSatish Balay     if (nztot) {
1235acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1236acdf5bf4SSatish Balay       int imark = -1;
1237acdf5bf4SSatish Balay       if (v) {
1238acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1239acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1240d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1241acdf5bf4SSatish Balay           else break;
1242acdf5bf4SSatish Balay         }
1243acdf5bf4SSatish Balay         imark = i;
1244acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1245acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1246acdf5bf4SSatish Balay       }
1247acdf5bf4SSatish Balay       if (idx) {
1248acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1249acdf5bf4SSatish Balay         if (imark > -1) {
1250acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1251bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1252acdf5bf4SSatish Balay           }
1253acdf5bf4SSatish Balay         } else {
1254acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1255d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1256d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1257acdf5bf4SSatish Balay             else break;
1258acdf5bf4SSatish Balay           }
1259acdf5bf4SSatish Balay           imark = i;
1260acdf5bf4SSatish Balay         }
1261d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1262d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1263acdf5bf4SSatish Balay       }
1264d64ed03dSBarry Smith     } else {
1265d212a18eSSatish Balay       if (idx) *idx = 0;
1266d212a18eSSatish Balay       if (v)   *v   = 0;
1267d212a18eSSatish Balay     }
1268acdf5bf4SSatish Balay   }
1269acdf5bf4SSatish Balay   *nz = nztot;
1270acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1271acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
12723a40ed3dSBarry Smith   PetscFunctionReturn(0);
1273acdf5bf4SSatish Balay }
1274acdf5bf4SSatish Balay 
12755615d1e5SSatish Balay #undef __FUNC__
12765615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1277acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1278acdf5bf4SSatish Balay {
1279acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1280d64ed03dSBarry Smith 
1281d64ed03dSBarry Smith   PetscFunctionBegin;
1282acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1283a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1284acdf5bf4SSatish Balay   }
1285acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
12863a40ed3dSBarry Smith   PetscFunctionReturn(0);
1287acdf5bf4SSatish Balay }
1288acdf5bf4SSatish Balay 
12895615d1e5SSatish Balay #undef __FUNC__
12905615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1291ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
12925a838052SSatish Balay {
12935a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1294d64ed03dSBarry Smith 
1295d64ed03dSBarry Smith   PetscFunctionBegin;
12965a838052SSatish Balay   *bs = baij->bs;
12973a40ed3dSBarry Smith   PetscFunctionReturn(0);
12985a838052SSatish Balay }
12995a838052SSatish Balay 
13005615d1e5SSatish Balay #undef __FUNC__
13015615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1302ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
130358667388SSatish Balay {
130458667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
130558667388SSatish Balay   int         ierr;
1306d64ed03dSBarry Smith 
1307d64ed03dSBarry Smith   PetscFunctionBegin;
130858667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
130958667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
13103a40ed3dSBarry Smith   PetscFunctionReturn(0);
131158667388SSatish Balay }
13120ac07820SSatish Balay 
13135615d1e5SSatish Balay #undef __FUNC__
13145615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1315ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
13160ac07820SSatish Balay {
13174e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
13184e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
13197d57db60SLois Curfman McInnes   int         ierr;
13207d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
13210ac07820SSatish Balay 
1322d64ed03dSBarry Smith   PetscFunctionBegin;
13234e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
13244e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
13254e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
13264e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
13274e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
13280ac07820SSatish Balay   if (flag == MAT_LOCAL) {
13294e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
13304e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
13314e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
13324e220ebcSLois Curfman McInnes     info->memory       = isend[3];
13334e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
13340ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1335ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr);
13364e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
13374e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
13384e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
13394e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
13404e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
13410ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1342ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr);
13434e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
13444e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
13454e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
13464e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
13474e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
13480ac07820SSatish Balay   }
13495a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
13505a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
13515a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
13525a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
13534e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
13544e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
13554e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
13563a40ed3dSBarry Smith   PetscFunctionReturn(0);
13570ac07820SSatish Balay }
13580ac07820SSatish Balay 
13595615d1e5SSatish Balay #undef __FUNC__
13605615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1361ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
136258667388SSatish Balay {
136358667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
136458667388SSatish Balay 
1365d64ed03dSBarry Smith   PetscFunctionBegin;
136658667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
136758667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
13686da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1369c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
137096854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1371c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1372b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1373b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1374b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1375aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
137658667388SSatish Balay         MatSetOption(a->A,op);
137758667388SSatish Balay         MatSetOption(a->B,op);
1378b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
13796da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
138058667388SSatish Balay              op == MAT_SYMMETRIC ||
138158667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
138258667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
138358667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
138458667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
138558667388SSatish Balay     a->roworiented = 0;
138658667388SSatish Balay     MatSetOption(a->A,op);
138758667388SSatish Balay     MatSetOption(a->B,op);
13882b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
138990f02eecSBarry Smith     a->donotstash = 1;
1390d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1391d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1392d64ed03dSBarry Smith   } else {
1393d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1394d64ed03dSBarry Smith   }
13953a40ed3dSBarry Smith   PetscFunctionReturn(0);
139658667388SSatish Balay }
139758667388SSatish Balay 
13985615d1e5SSatish Balay #undef __FUNC__
13995615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1400ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
14010ac07820SSatish Balay {
14020ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
14030ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
14040ac07820SSatish Balay   Mat        B;
14050ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
14060ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
14070ac07820SSatish Balay   Scalar     *a;
14080ac07820SSatish Balay 
1409d64ed03dSBarry Smith   PetscFunctionBegin;
1410a8c6a408SBarry Smith   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
14110ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
14120ac07820SSatish Balay   CHKERRQ(ierr);
14130ac07820SSatish Balay 
14140ac07820SSatish Balay   /* copy over the A part */
14150ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
14160ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
14170ac07820SSatish Balay   row = baij->rstart;
14180ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
14190ac07820SSatish Balay 
14200ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
14210ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
14220ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
14230ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
14240ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
14250ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
14260ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
14270ac07820SSatish Balay         col++; a += bs;
14280ac07820SSatish Balay       }
14290ac07820SSatish Balay     }
14300ac07820SSatish Balay   }
14310ac07820SSatish Balay   /* copy over the B part */
14320ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
14330ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
14340ac07820SSatish Balay   row = baij->rstart*bs;
14350ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
14360ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
14370ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
14380ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
14390ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
14400ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
14410ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
14420ac07820SSatish Balay         col++; a += bs;
14430ac07820SSatish Balay       }
14440ac07820SSatish Balay     }
14450ac07820SSatish Balay   }
14460ac07820SSatish Balay   PetscFree(rvals);
14470ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
14480ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
14490ac07820SSatish Balay 
14500ac07820SSatish Balay   if (matout != PETSC_NULL) {
14510ac07820SSatish Balay     *matout = B;
14520ac07820SSatish Balay   } else {
14530ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
14540ac07820SSatish Balay     PetscFree(baij->rowners);
14550ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
14560ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
14570ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
14580ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
14590ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
14600ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
14610ac07820SSatish Balay     PetscFree(baij);
1462f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
14630ac07820SSatish Balay     PetscHeaderDestroy(B);
14640ac07820SSatish Balay   }
14653a40ed3dSBarry Smith   PetscFunctionReturn(0);
14660ac07820SSatish Balay }
14670e95ebc0SSatish Balay 
14685615d1e5SSatish Balay #undef __FUNC__
14695615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
14700e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
14710e95ebc0SSatish Balay {
14720e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
14730e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
14740e95ebc0SSatish Balay   int ierr,s1,s2,s3;
14750e95ebc0SSatish Balay 
1476d64ed03dSBarry Smith   PetscFunctionBegin;
14770e95ebc0SSatish Balay   if (ll)  {
14780e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
14790e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1480a8c6a408SBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
14810e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
14820e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
14830e95ebc0SSatish Balay   }
1484a8c6a408SBarry Smith   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
14853a40ed3dSBarry Smith   PetscFunctionReturn(0);
14860e95ebc0SSatish Balay }
14870e95ebc0SSatish Balay 
14885615d1e5SSatish Balay #undef __FUNC__
14895615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1490ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
14910ac07820SSatish Balay {
14920ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
14930ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1494a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
14950ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
14960ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1497a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
14980ac07820SSatish Balay   MPI_Comm       comm = A->comm;
14990ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
15000ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
15010ac07820SSatish Balay   IS             istmp;
15020ac07820SSatish Balay 
1503d64ed03dSBarry Smith   PetscFunctionBegin;
15040ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
15050ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
15060ac07820SSatish Balay 
15070ac07820SSatish Balay   /*  first count number of contributors to each processor */
15080ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
15090ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
15100ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
15110ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
15120ac07820SSatish Balay     idx = rows[i];
15130ac07820SSatish Balay     found = 0;
15140ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
15150ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
15160ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
15170ac07820SSatish Balay       }
15180ac07820SSatish Balay     }
1519a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
15200ac07820SSatish Balay   }
15210ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
15220ac07820SSatish Balay 
15230ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
15240ac07820SSatish Balay   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1525ca161407SBarry Smith   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
15260ac07820SSatish Balay   nrecvs = work[rank];
1527ca161407SBarry Smith   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
15280ac07820SSatish Balay   nmax   = work[rank];
15290ac07820SSatish Balay   PetscFree(work);
15300ac07820SSatish Balay 
15310ac07820SSatish Balay   /* post receives:   */
1532d64ed03dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1533d64ed03dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
15340ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
1535ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
15360ac07820SSatish Balay   }
15370ac07820SSatish Balay 
15380ac07820SSatish Balay   /* do sends:
15390ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
15400ac07820SSatish Balay      the ith processor
15410ac07820SSatish Balay   */
15420ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1543ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
15440ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
15450ac07820SSatish Balay   starts[0] = 0;
15460ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
15470ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
15480ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
15490ac07820SSatish Balay   }
15500ac07820SSatish Balay   ISRestoreIndices(is,&rows);
15510ac07820SSatish Balay 
15520ac07820SSatish Balay   starts[0] = 0;
15530ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
15540ac07820SSatish Balay   count = 0;
15550ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
15560ac07820SSatish Balay     if (procs[i]) {
1557ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
15580ac07820SSatish Balay     }
15590ac07820SSatish Balay   }
15600ac07820SSatish Balay   PetscFree(starts);
15610ac07820SSatish Balay 
15620ac07820SSatish Balay   base = owners[rank]*bs;
15630ac07820SSatish Balay 
15640ac07820SSatish Balay   /*  wait on receives */
15650ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
15660ac07820SSatish Balay   source = lens + nrecvs;
15670ac07820SSatish Balay   count  = nrecvs; slen = 0;
15680ac07820SSatish Balay   while (count) {
1569ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
15700ac07820SSatish Balay     /* unpack receives into our local space */
1571ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
15720ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
15730ac07820SSatish Balay     lens[imdex]  = n;
15740ac07820SSatish Balay     slen += n;
15750ac07820SSatish Balay     count--;
15760ac07820SSatish Balay   }
15770ac07820SSatish Balay   PetscFree(recv_waits);
15780ac07820SSatish Balay 
15790ac07820SSatish Balay   /* move the data into the send scatter */
15800ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
15810ac07820SSatish Balay   count = 0;
15820ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
15830ac07820SSatish Balay     values = rvalues + i*nmax;
15840ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
15850ac07820SSatish Balay       lrows[count++] = values[j] - base;
15860ac07820SSatish Balay     }
15870ac07820SSatish Balay   }
15880ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
15890ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
15900ac07820SSatish Balay 
15910ac07820SSatish Balay   /* actually zap the local rows */
1592029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
15930ac07820SSatish Balay   PLogObjectParent(A,istmp);
1594a07cd24cSSatish Balay 
1595a07cd24cSSatish Balay   ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
15960ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
15970ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
15980ac07820SSatish Balay 
1599a07cd24cSSatish Balay   if (diag) {
1600a07cd24cSSatish Balay     for ( i = 0; i < slen; i++ ) {
1601a07cd24cSSatish Balay       row = lrows[i] + rstart_bs;
1602a07cd24cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1603a07cd24cSSatish Balay     }
1604a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1605a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1606a07cd24cSSatish Balay   }
1607a07cd24cSSatish Balay   PetscFree(lrows);
1608a07cd24cSSatish Balay 
16090ac07820SSatish Balay   /* wait on sends */
16100ac07820SSatish Balay   if (nsends) {
1611d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1612ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
16130ac07820SSatish Balay     PetscFree(send_status);
16140ac07820SSatish Balay   }
16150ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
16160ac07820SSatish Balay 
16173a40ed3dSBarry Smith   PetscFunctionReturn(0);
16180ac07820SSatish Balay }
1619ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
16205615d1e5SSatish Balay #undef __FUNC__
16215615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1622ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1623ba4ca20aSSatish Balay {
1624ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
16253a40ed3dSBarry Smith   int         ierr;
1626ba4ca20aSSatish Balay 
1627d64ed03dSBarry Smith   PetscFunctionBegin;
16283a40ed3dSBarry Smith   if (!a->rank) {
16293a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
16303a40ed3dSBarry Smith   }
16313a40ed3dSBarry Smith   PetscFunctionReturn(0);
1632ba4ca20aSSatish Balay }
16330ac07820SSatish Balay 
16345615d1e5SSatish Balay #undef __FUNC__
16355615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1636ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1637bb5a7306SBarry Smith {
1638bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1639bb5a7306SBarry Smith   int         ierr;
1640d64ed03dSBarry Smith 
1641d64ed03dSBarry Smith   PetscFunctionBegin;
1642bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
16433a40ed3dSBarry Smith   PetscFunctionReturn(0);
1644bb5a7306SBarry Smith }
1645bb5a7306SBarry Smith 
16460ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
16470ac07820SSatish Balay 
164879bdfe76SSatish Balay /* -------------------------------------------------------------------*/
164979bdfe76SSatish Balay static struct _MatOps MatOps = {
1650bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
16514c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
16524c50302cSBarry Smith   0,0,0,0,
16530ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
16540e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
165558667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
16564c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
16574c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
16584c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
165994a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1660d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1661ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1662bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1663ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
166479bdfe76SSatish Balay 
166579bdfe76SSatish Balay 
16665615d1e5SSatish Balay #undef __FUNC__
16675615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
166879bdfe76SSatish Balay /*@C
166979bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
167079bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
167179bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
167279bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
167379bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
167479bdfe76SSatish Balay 
167579bdfe76SSatish Balay    Input Parameters:
167679bdfe76SSatish Balay .  comm - MPI communicator
167779bdfe76SSatish Balay .  bs   - size of blockk
167879bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
167992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
168092e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
168192e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
168292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
168392e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
168479bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
168592e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
168679bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
168779bdfe76SSatish Balay            submatrix  (same for all local rows)
168892e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
168992e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
169092e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
169192e8d321SLois Curfman McInnes            it is zero.
169292e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
169379bdfe76SSatish Balay            submatrix (same for all local rows).
169492e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
169592e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
169692e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
169779bdfe76SSatish Balay 
169879bdfe76SSatish Balay    Output Parameter:
169979bdfe76SSatish Balay .  A - the matrix
170079bdfe76SSatish Balay 
170179bdfe76SSatish Balay    Notes:
170279bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
170379bdfe76SSatish Balay    (possibly both).
170479bdfe76SSatish Balay 
170579bdfe76SSatish Balay    Storage Information:
170679bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
170779bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
170879bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
170979bdfe76SSatish Balay    local matrix (a rectangular submatrix).
171079bdfe76SSatish Balay 
171179bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
171279bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
171379bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
171479bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
171579bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
171679bdfe76SSatish Balay 
171779bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
171879bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
171979bdfe76SSatish Balay 
172079bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
172179bdfe76SSatish Balay $         -------------------
172279bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
172379bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
172479bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
172579bdfe76SSatish Balay $         -------------------
172679bdfe76SSatish Balay $
172779bdfe76SSatish Balay 
172879bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
172979bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
173079bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
173157b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
173279bdfe76SSatish Balay 
1733d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1734d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
173579bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
173692e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
173792e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
17386da5968aSLois Curfman McInnes    matrices.
173979bdfe76SSatish Balay 
174092e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
174179bdfe76SSatish Balay 
174279bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
174379bdfe76SSatish Balay @*/
174479bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
174579bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
174679bdfe76SSatish Balay {
174779bdfe76SSatish Balay   Mat          B;
174879bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
17494c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
175079bdfe76SSatish Balay 
1751d64ed03dSBarry Smith   PetscFunctionBegin;
1752a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
17533914022bSBarry Smith 
17543914022bSBarry Smith   MPI_Comm_size(comm,&size);
17553914022bSBarry Smith   if (size == 1) {
17563914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
17573914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
17583914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
17593a40ed3dSBarry Smith     PetscFunctionReturn(0);
17603914022bSBarry Smith   }
17613914022bSBarry Smith 
176279bdfe76SSatish Balay   *A = 0;
1763d4bb536fSBarry Smith   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
176479bdfe76SSatish Balay   PLogObjectCreate(B);
176579bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
176679bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
176779bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
17684c50302cSBarry Smith 
176979bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
177079bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
177190f02eecSBarry Smith   B->mapping    = 0;
177279bdfe76SSatish Balay   B->factor     = 0;
177379bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
177479bdfe76SSatish Balay 
1775e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
177679bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
177779bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
177879bdfe76SSatish Balay 
1779d64ed03dSBarry Smith   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1780a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1781d64ed03dSBarry Smith   }
1782a8c6a408SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1783a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1784a8c6a408SBarry Smith   }
1785a8c6a408SBarry Smith   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1786a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1787a8c6a408SBarry Smith   }
1788cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1789cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
179079bdfe76SSatish Balay 
179179bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
179279bdfe76SSatish Balay     work[0] = m; work[1] = n;
179379bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
1794ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
179579bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
179679bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
179779bdfe76SSatish Balay   }
179879bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
179979bdfe76SSatish Balay     Mbs = M/bs;
1800a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
180179bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
180279bdfe76SSatish Balay     m   = mbs*bs;
180379bdfe76SSatish Balay   }
180479bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
180579bdfe76SSatish Balay     Nbs = N/bs;
1806a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
180779bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
180879bdfe76SSatish Balay     n   = nbs*bs;
180979bdfe76SSatish Balay   }
1810a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
1811a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1812a8c6a408SBarry Smith   }
181379bdfe76SSatish Balay 
181479bdfe76SSatish Balay   b->m = m; B->m = m;
181579bdfe76SSatish Balay   b->n = n; B->n = n;
181679bdfe76SSatish Balay   b->N = N; B->N = N;
181779bdfe76SSatish Balay   b->M = M; B->M = M;
181879bdfe76SSatish Balay   b->bs  = bs;
181979bdfe76SSatish Balay   b->bs2 = bs*bs;
182079bdfe76SSatish Balay   b->mbs = mbs;
182179bdfe76SSatish Balay   b->nbs = nbs;
182279bdfe76SSatish Balay   b->Mbs = Mbs;
182379bdfe76SSatish Balay   b->Nbs = Nbs;
182479bdfe76SSatish Balay 
182579bdfe76SSatish Balay   /* build local table of row and column ownerships */
182679bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1827f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
18280ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
1829ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
183079bdfe76SSatish Balay   b->rowners[0] = 0;
183179bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
183279bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
183379bdfe76SSatish Balay   }
183479bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
183579bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
18364fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
18374fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
18384fa0d573SSatish Balay 
1839ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
184079bdfe76SSatish Balay   b->cowners[0] = 0;
184179bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
184279bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
184379bdfe76SSatish Balay   }
184479bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
184579bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
18464fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
18474fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
184879bdfe76SSatish Balay 
1849a07cd24cSSatish Balay 
185079bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1851029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
185279bdfe76SSatish Balay   PLogObjectParent(B,b->A);
185379bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1854029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
185579bdfe76SSatish Balay   PLogObjectParent(B,b->B);
185679bdfe76SSatish Balay 
185779bdfe76SSatish Balay   /* build cache for off array entries formed */
185879bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
185990f02eecSBarry Smith   b->donotstash  = 0;
186079bdfe76SSatish Balay   b->colmap      = 0;
186179bdfe76SSatish Balay   b->garray      = 0;
186279bdfe76SSatish Balay   b->roworiented = 1;
186379bdfe76SSatish Balay 
186430793edcSSatish Balay   /* stuff used in block assembly */
186530793edcSSatish Balay   b->barray       = 0;
186630793edcSSatish Balay 
186779bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
186879bdfe76SSatish Balay   b->lvec         = 0;
186979bdfe76SSatish Balay   b->Mvctx        = 0;
187079bdfe76SSatish Balay 
187179bdfe76SSatish Balay   /* stuff for MatGetRow() */
187279bdfe76SSatish Balay   b->rowindices   = 0;
187379bdfe76SSatish Balay   b->rowvalues    = 0;
187479bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
187579bdfe76SSatish Balay 
1876a07cd24cSSatish Balay   /* hash table stuff */
1877a07cd24cSSatish Balay   b->ht          = 0;
18780bdbc534SSatish Balay   b->ht_size     = 0;
1879a07cd24cSSatish Balay 
188079bdfe76SSatish Balay   *A = B;
18813a40ed3dSBarry Smith   PetscFunctionReturn(0);
188279bdfe76SSatish Balay }
1883026e39d0SSatish Balay 
18845615d1e5SSatish Balay #undef __FUNC__
18855615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
18860ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
18870ac07820SSatish Balay {
18880ac07820SSatish Balay   Mat         mat;
18890ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
18900ac07820SSatish Balay   int         ierr, len=0, flg;
18910ac07820SSatish Balay 
1892d64ed03dSBarry Smith   PetscFunctionBegin;
18930ac07820SSatish Balay   *newmat       = 0;
1894d4bb536fSBarry Smith   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
18950ac07820SSatish Balay   PLogObjectCreate(mat);
18960ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
18970ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
18980ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
18990ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
19000ac07820SSatish Balay   mat->factor     = matin->factor;
19010ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
19020ac07820SSatish Balay 
19030ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
19040ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
19050ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
19060ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
19070ac07820SSatish Balay 
19080ac07820SSatish Balay   a->bs  = oldmat->bs;
19090ac07820SSatish Balay   a->bs2 = oldmat->bs2;
19100ac07820SSatish Balay   a->mbs = oldmat->mbs;
19110ac07820SSatish Balay   a->nbs = oldmat->nbs;
19120ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
19130ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
19140ac07820SSatish Balay 
19150ac07820SSatish Balay   a->rstart       = oldmat->rstart;
19160ac07820SSatish Balay   a->rend         = oldmat->rend;
19170ac07820SSatish Balay   a->cstart       = oldmat->cstart;
19180ac07820SSatish Balay   a->cend         = oldmat->cend;
19190ac07820SSatish Balay   a->size         = oldmat->size;
19200ac07820SSatish Balay   a->rank         = oldmat->rank;
1921e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
19220ac07820SSatish Balay   a->rowvalues    = 0;
19230ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
192430793edcSSatish Balay   a->barray       = 0;
19250ac07820SSatish Balay 
19260ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1927f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
19280ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
19290ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
19300ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
19310ac07820SSatish Balay   if (oldmat->colmap) {
19320ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
19330ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
19340ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
19350ac07820SSatish Balay   } else a->colmap = 0;
19360ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
19370ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
19380ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
19390ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
19400ac07820SSatish Balay   } else a->garray = 0;
19410ac07820SSatish Balay 
19420ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
19430ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
19440ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
19450ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
19460ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
19470ac07820SSatish Balay   PLogObjectParent(mat,a->A);
19480ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
19490ac07820SSatish Balay   PLogObjectParent(mat,a->B);
19500ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
19510ac07820SSatish Balay   if (flg) {
19520ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
19530ac07820SSatish Balay   }
19540ac07820SSatish Balay   *newmat = mat;
19553a40ed3dSBarry Smith   PetscFunctionReturn(0);
19560ac07820SSatish Balay }
195757b952d6SSatish Balay 
195857b952d6SSatish Balay #include "sys.h"
195957b952d6SSatish Balay 
19605615d1e5SSatish Balay #undef __FUNC__
19615615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
196257b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
196357b952d6SSatish Balay {
196457b952d6SSatish Balay   Mat          A;
196557b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
196657b952d6SSatish Balay   Scalar       *vals,*buf;
196757b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
196857b952d6SSatish Balay   MPI_Status   status;
1969cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
197057b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
197157b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
197257b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
197357b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
197457b952d6SSatish Balay 
1975d64ed03dSBarry Smith   PetscFunctionBegin;
197657b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
197757b952d6SSatish Balay   bs2  = bs*bs;
197857b952d6SSatish Balay 
197957b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
198057b952d6SSatish Balay   if (!rank) {
198157b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1982e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
1983a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1984d64ed03dSBarry Smith     if (header[3] < 0) {
1985a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
1986d64ed03dSBarry Smith     }
19876c5fab8fSBarry Smith   }
1988d64ed03dSBarry Smith 
1989ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
199057b952d6SSatish Balay   M = header[1]; N = header[2];
199157b952d6SSatish Balay 
1992a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
199357b952d6SSatish Balay 
199457b952d6SSatish Balay   /*
199557b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
199657b952d6SSatish Balay      divisible by the blocksize
199757b952d6SSatish Balay   */
199857b952d6SSatish Balay   Mbs        = M/bs;
199957b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
200057b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
200157b952d6SSatish Balay   else                  Mbs++;
200257b952d6SSatish Balay   if (extra_rows &&!rank) {
2003b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
200457b952d6SSatish Balay   }
2005537820f0SBarry Smith 
200657b952d6SSatish Balay   /* determine ownership of all rows */
200757b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
200857b952d6SSatish Balay   m   = mbs * bs;
2009cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2010cee3aa6bSSatish Balay   browners = rowners + size + 1;
2011ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
201257b952d6SSatish Balay   rowners[0] = 0;
2013cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2014cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
201557b952d6SSatish Balay   rstart = rowners[rank];
201657b952d6SSatish Balay   rend   = rowners[rank+1];
201757b952d6SSatish Balay 
201857b952d6SSatish Balay   /* distribute row lengths to all processors */
201957b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
202057b952d6SSatish Balay   if (!rank) {
202157b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2022e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
202357b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
202457b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2025cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2026ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
202757b952d6SSatish Balay     PetscFree(sndcounts);
2028d64ed03dSBarry Smith   } else {
2029ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
203057b952d6SSatish Balay   }
203157b952d6SSatish Balay 
203257b952d6SSatish Balay   if (!rank) {
203357b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
203457b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
203557b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
203657b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
203757b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
203857b952d6SSatish Balay         procsnz[i] += rowlengths[j];
203957b952d6SSatish Balay       }
204057b952d6SSatish Balay     }
204157b952d6SSatish Balay     PetscFree(rowlengths);
204257b952d6SSatish Balay 
204357b952d6SSatish Balay     /* determine max buffer needed and allocate it */
204457b952d6SSatish Balay     maxnz = 0;
204557b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
204657b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
204757b952d6SSatish Balay     }
204857b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
204957b952d6SSatish Balay 
205057b952d6SSatish Balay     /* read in my part of the matrix column indices  */
205157b952d6SSatish Balay     nz = procsnz[0];
205257b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
205357b952d6SSatish Balay     mycols = ibuf;
2054cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2055e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2056cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2057cee3aa6bSSatish Balay 
205857b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
205957b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
206057b952d6SSatish Balay       nz   = procsnz[i];
2061e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2062ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
206357b952d6SSatish Balay     }
206457b952d6SSatish Balay     /* read in the stuff for the last proc */
206557b952d6SSatish Balay     if ( size != 1 ) {
206657b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2067e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
206857b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2069ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
207057b952d6SSatish Balay     }
207157b952d6SSatish Balay     PetscFree(cols);
2072d64ed03dSBarry Smith   } else {
207357b952d6SSatish Balay     /* determine buffer space needed for message */
207457b952d6SSatish Balay     nz = 0;
207557b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
207657b952d6SSatish Balay       nz += locrowlens[i];
207757b952d6SSatish Balay     }
207857b952d6SSatish Balay     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
207957b952d6SSatish Balay     mycols = ibuf;
208057b952d6SSatish Balay     /* receive message of column indices*/
2081ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2082ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2083a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
208457b952d6SSatish Balay   }
208557b952d6SSatish Balay 
208657b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2087cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2088cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
208957b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2090cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
209157b952d6SSatish Balay   masked1 = mask    + Mbs;
209257b952d6SSatish Balay   masked2 = masked1 + Mbs;
209357b952d6SSatish Balay   rowcount = 0; nzcount = 0;
209457b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
209557b952d6SSatish Balay     dcount  = 0;
209657b952d6SSatish Balay     odcount = 0;
209757b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
209857b952d6SSatish Balay       kmax = locrowlens[rowcount];
209957b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
210057b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
210157b952d6SSatish Balay         if (!mask[tmp]) {
210257b952d6SSatish Balay           mask[tmp] = 1;
210357b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
210457b952d6SSatish Balay           else masked1[dcount++] = tmp;
210557b952d6SSatish Balay         }
210657b952d6SSatish Balay       }
210757b952d6SSatish Balay       rowcount++;
210857b952d6SSatish Balay     }
2109cee3aa6bSSatish Balay 
211057b952d6SSatish Balay     dlens[i]  = dcount;
211157b952d6SSatish Balay     odlens[i] = odcount;
2112cee3aa6bSSatish Balay 
211357b952d6SSatish Balay     /* zero out the mask elements we set */
211457b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
211557b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
211657b952d6SSatish Balay   }
2117cee3aa6bSSatish Balay 
211857b952d6SSatish Balay   /* create our matrix */
2119537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2120537820f0SBarry Smith          CHKERRQ(ierr);
212157b952d6SSatish Balay   A = *newmat;
21226d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
212357b952d6SSatish Balay 
212457b952d6SSatish Balay   if (!rank) {
212557b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
212657b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
212757b952d6SSatish Balay     nz = procsnz[0];
212857b952d6SSatish Balay     vals = buf;
2129cee3aa6bSSatish Balay     mycols = ibuf;
2130cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2131e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2132cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2133537820f0SBarry Smith 
213457b952d6SSatish Balay     /* insert into matrix */
213557b952d6SSatish Balay     jj      = rstart*bs;
213657b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
213757b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
213857b952d6SSatish Balay       mycols += locrowlens[i];
213957b952d6SSatish Balay       vals   += locrowlens[i];
214057b952d6SSatish Balay       jj++;
214157b952d6SSatish Balay     }
214257b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
214357b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
214457b952d6SSatish Balay       nz   = procsnz[i];
214557b952d6SSatish Balay       vals = buf;
2146e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2147ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
214857b952d6SSatish Balay     }
214957b952d6SSatish Balay     /* the last proc */
215057b952d6SSatish Balay     if ( size != 1 ){
215157b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2152cee3aa6bSSatish Balay       vals = buf;
2153e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
215457b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2155ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
215657b952d6SSatish Balay     }
215757b952d6SSatish Balay     PetscFree(procsnz);
2158d64ed03dSBarry Smith   } else {
215957b952d6SSatish Balay     /* receive numeric values */
216057b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
216157b952d6SSatish Balay 
216257b952d6SSatish Balay     /* receive message of values*/
216357b952d6SSatish Balay     vals   = buf;
2164cee3aa6bSSatish Balay     mycols = ibuf;
2165ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2166ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2167a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
216857b952d6SSatish Balay 
216957b952d6SSatish Balay     /* insert into matrix */
217057b952d6SSatish Balay     jj      = rstart*bs;
2171cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
217257b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
217357b952d6SSatish Balay       mycols += locrowlens[i];
217457b952d6SSatish Balay       vals   += locrowlens[i];
217557b952d6SSatish Balay       jj++;
217657b952d6SSatish Balay     }
217757b952d6SSatish Balay   }
217857b952d6SSatish Balay   PetscFree(locrowlens);
217957b952d6SSatish Balay   PetscFree(buf);
218057b952d6SSatish Balay   PetscFree(ibuf);
218157b952d6SSatish Balay   PetscFree(rowners);
218257b952d6SSatish Balay   PetscFree(dlens);
2183cee3aa6bSSatish Balay   PetscFree(mask);
21846d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
21856d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
21863a40ed3dSBarry Smith   PetscFunctionReturn(0);
218757b952d6SSatish Balay }
218857b952d6SSatish Balay 
218957b952d6SSatish Balay 
2190