xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision c276075480160a5c038543a369203006b684a72e)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*c2760754SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.99 1998/01/13 00:07:02 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
143b2fbd54SBarry Smith 
15537820f0SBarry Smith /*
16537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
1757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
1857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
1957b952d6SSatish Balay    length of colmap equals the global matrix length.
2057b952d6SSatish Balay */
215615d1e5SSatish Balay #undef __FUNC__
225615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
2357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2457b952d6SSatish Balay {
2557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
27928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
2857b952d6SSatish Balay 
29d64ed03dSBarry Smith   PetscFunctionBegin;
30758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3157b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3257b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
33928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
343a40ed3dSBarry Smith   PetscFunctionReturn(0);
3557b952d6SSatish Balay }
3657b952d6SSatish Balay 
3780c1aa95SSatish Balay #define CHUNKSIZE  10
3880c1aa95SSatish Balay 
39f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
4080c1aa95SSatish Balay { \
4180c1aa95SSatish Balay  \
4280c1aa95SSatish Balay     brow = row/bs;  \
4380c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
44ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
4580c1aa95SSatish Balay       bcol = col/bs; \
4680c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
47ab26458aSBarry Smith       low = 0; high = nrow; \
48ab26458aSBarry Smith       while (high-low > 3) { \
49ab26458aSBarry Smith         t = (low+high)/2; \
50ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
51ab26458aSBarry Smith         else              low  = t; \
52ab26458aSBarry Smith       } \
53ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
5480c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
5580c1aa95SSatish Balay         if (rp[_i] == bcol) { \
5680c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
57eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
58eada6651SSatish Balay           else                    *bap  = value;  \
59ac7a638eSSatish Balay           goto a_noinsert; \
6080c1aa95SSatish Balay         } \
6180c1aa95SSatish Balay       } \
6289280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
63a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
6480c1aa95SSatish Balay       if (nrow >= rmax) { \
6580c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
6680c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
6780c1aa95SSatish Balay         Scalar *new_a; \
6880c1aa95SSatish Balay  \
69a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
7089280ab3SLois Curfman McInnes  \
7180c1aa95SSatish Balay         /* malloc new storage space */ \
7280c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
7380c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
7480c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
7580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
7680c1aa95SSatish Balay  \
7780c1aa95SSatish Balay         /* copy over old data into new slots */ \
7880c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
7980c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
8080c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
8180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
8280c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
8380c1aa95SSatish Balay                                                            len*sizeof(int)); \
8480c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
8580c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
8680c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
8780c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
8880c1aa95SSatish Balay         /* free up old matrix storage */ \
8980c1aa95SSatish Balay         PetscFree(a->a);  \
9080c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
9180c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
9280c1aa95SSatish Balay         a->singlemalloc = 1; \
9380c1aa95SSatish Balay  \
9480c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
9680c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
9780c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
9880c1aa95SSatish Balay         a->reallocs++; \
9980c1aa95SSatish Balay         a->nz++; \
10080c1aa95SSatish Balay       } \
10180c1aa95SSatish Balay       N = nrow++ - 1;  \
10280c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
10380c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
10480c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
10580c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
10680c1aa95SSatish Balay       } \
10780c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
10880c1aa95SSatish Balay       rp[_i]                      = bcol;  \
10980c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
110ac7a638eSSatish Balay       a_noinsert:; \
11180c1aa95SSatish Balay     ailen[brow] = nrow; \
11280c1aa95SSatish Balay }
11357b952d6SSatish Balay 
114ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
115ac7a638eSSatish Balay { \
116ac7a638eSSatish Balay  \
117ac7a638eSSatish Balay     brow = row/bs;  \
118ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
119ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
120ac7a638eSSatish Balay       bcol = col/bs; \
121ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
122ac7a638eSSatish Balay       low = 0; high = nrow; \
123ac7a638eSSatish Balay       while (high-low > 3) { \
124ac7a638eSSatish Balay         t = (low+high)/2; \
125ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
126ac7a638eSSatish Balay         else              low  = t; \
127ac7a638eSSatish Balay       } \
128ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
129ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
130ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
131ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
132ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
133ac7a638eSSatish Balay           else                    *bap  = value;  \
134ac7a638eSSatish Balay           goto b_noinsert; \
135ac7a638eSSatish Balay         } \
136ac7a638eSSatish Balay       } \
13789280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
138a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139ac7a638eSSatish Balay       if (nrow >= rmax) { \
140ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
14174c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
142ac7a638eSSatish Balay         Scalar *new_a; \
143ac7a638eSSatish Balay  \
144a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
14589280ab3SLois Curfman McInnes  \
146ac7a638eSSatish Balay         /* malloc new storage space */ \
14774c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
148ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
149ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
150ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
151ac7a638eSSatish Balay  \
152ac7a638eSSatish Balay         /* copy over old data into new slots */ \
153ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
15474c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
156ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
157ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
158ac7a638eSSatish Balay                                                            len*sizeof(int)); \
159ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
160ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
161ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
162ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
163ac7a638eSSatish Balay         /* free up old matrix storage */ \
16474c639caSSatish Balay         PetscFree(b->a);  \
16574c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
16674c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
16774c639caSSatish Balay         b->singlemalloc = 1; \
168ac7a638eSSatish Balay  \
169ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
17174c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
17274c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
17374c639caSSatish Balay         b->reallocs++; \
17474c639caSSatish Balay         b->nz++; \
175ac7a638eSSatish Balay       } \
176ac7a638eSSatish Balay       N = nrow++ - 1;  \
177ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
178ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
179ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
180ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
181ac7a638eSSatish Balay       } \
182ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
183ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
184ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
185ac7a638eSSatish Balay       b_noinsert:; \
186ac7a638eSSatish Balay     bilen[brow] = nrow; \
187ac7a638eSSatish Balay }
188ac7a638eSSatish Balay 
1895615d1e5SSatish Balay #undef __FUNC__
1905615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
191ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
19257b952d6SSatish Balay {
19357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
19457b952d6SSatish Balay   Scalar      value;
1954fa0d573SSatish Balay   int         ierr,i,j,row,col;
1964fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1974fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1984fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
19957b952d6SSatish Balay 
200eada6651SSatish Balay   /* Some Variables required in the macro */
20180c1aa95SSatish Balay   Mat         A = baij->A;
20280c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
203ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204ac7a638eSSatish Balay   Scalar      *aa=a->a;
205ac7a638eSSatish Balay 
206ac7a638eSSatish Balay   Mat         B = baij->B;
207ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
208ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209ac7a638eSSatish Balay   Scalar      *ba=b->a;
210ac7a638eSSatish Balay 
211ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
212ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
213ac7a638eSSatish Balay   Scalar      *ap,*bap;
21480c1aa95SSatish Balay 
215d64ed03dSBarry Smith   PetscFunctionBegin;
21657b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
2173a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
218a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
219a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220639f9d9dSBarry Smith #endif
22157b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
22257b952d6SSatish Balay       row = im[i] - rstart_orig;
22357b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
22457b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
22557b952d6SSatish Balay           col = in[j] - cstart_orig;
22657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
22880c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
22957b952d6SSatish Balay         }
2303a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
231a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
232a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
233639f9d9dSBarry Smith #endif
23457b952d6SSatish Balay         else {
23557b952d6SSatish Balay           if (mat->was_assembled) {
236905e6a2fSBarry Smith             if (!baij->colmap) {
237905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238905e6a2fSBarry Smith             }
239905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
24057b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
24157b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
24257b952d6SSatish Balay               col =  in[j];
2439bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2449bf004c3SSatish Balay               B = baij->B;
2459bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2469bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2479bf004c3SSatish Balay               ba=b->a;
24857b952d6SSatish Balay             }
249d64ed03dSBarry Smith           } else col = in[j];
25057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
252ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25357b952d6SSatish Balay         }
25457b952d6SSatish Balay       }
255d64ed03dSBarry Smith     } else {
25690f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
25757b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258d64ed03dSBarry Smith       } else {
25990f02eecSBarry Smith         if (!baij->donotstash) {
26057b952d6SSatish Balay           row = im[i];
26157b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
26257b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
26357b952d6SSatish Balay           }
26457b952d6SSatish Balay         }
26557b952d6SSatish Balay       }
26657b952d6SSatish Balay     }
26790f02eecSBarry Smith   }
2683a40ed3dSBarry Smith   PetscFunctionReturn(0);
26957b952d6SSatish Balay }
27057b952d6SSatish Balay 
271ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
272ab26458aSBarry Smith #undef __FUNC__
273ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
274ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
275ab26458aSBarry Smith {
276ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27730793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
278abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
279ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
280ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
281ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
282ab26458aSBarry Smith 
28330793edcSSatish Balay   if(!barray) {
28447513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
28530793edcSSatish Balay   }
28630793edcSSatish Balay 
287ab26458aSBarry Smith   if (roworiented) {
288ab26458aSBarry Smith     stepval = (n-1)*bs;
289ab26458aSBarry Smith   } else {
290ab26458aSBarry Smith     stepval = (m-1)*bs;
291ab26458aSBarry Smith   }
292ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
2933a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
294a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
295a8c6a408SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
296ab26458aSBarry Smith #endif
297ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
298ab26458aSBarry Smith       row = im[i] - rstart;
299ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
30015b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
30115b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
30215b57d14SSatish Balay           barray = v + i*bs2;
30315b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
30415b57d14SSatish Balay           barray = v + j*bs2;
30515b57d14SSatish Balay         } else { /* Here a copy is required */
306ab26458aSBarry Smith           if (roworiented) {
307ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
308ab26458aSBarry Smith           } else {
309ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
310abef11f7SSatish Balay           }
31147513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
31247513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
31330793edcSSatish Balay               *barray++  = *value++;
31447513183SBarry Smith             }
31547513183SBarry Smith           }
31630793edcSSatish Balay           barray -=bs2;
31715b57d14SSatish Balay         }
318abef11f7SSatish Balay 
319abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
320abef11f7SSatish Balay           col  = in[j] - cstart;
32130793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
322ab26458aSBarry Smith         }
3233a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
324a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
325a8c6a408SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
326ab26458aSBarry Smith #endif
327ab26458aSBarry Smith         else {
328ab26458aSBarry Smith           if (mat->was_assembled) {
329ab26458aSBarry Smith             if (!baij->colmap) {
330ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
331ab26458aSBarry Smith             }
332a5eb4965SSatish Balay 
3333a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
334a8c6a408SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
335a5eb4965SSatish Balay #endif
336a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
337ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
338ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
339ab26458aSBarry Smith               col =  in[j];
340ab26458aSBarry Smith             }
341ab26458aSBarry Smith           }
342ab26458aSBarry Smith           else col = in[j];
34330793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344ab26458aSBarry Smith         }
345ab26458aSBarry Smith       }
346d64ed03dSBarry Smith     } else {
347ab26458aSBarry Smith       if (!baij->donotstash) {
348ab26458aSBarry Smith         if (roworiented ) {
349abef11f7SSatish Balay           row   = im[i]*bs;
350abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
351abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
352abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
353abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
354abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
355abef11f7SSatish Balay               }
356ab26458aSBarry Smith             }
357ab26458aSBarry Smith           }
358d64ed03dSBarry Smith         } else {
359ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
360abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
361abef11f7SSatish Balay             col   = in[j]*bs;
362abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
363abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
364abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
365ab26458aSBarry Smith               }
366ab26458aSBarry Smith             }
367ab26458aSBarry Smith           }
368abef11f7SSatish Balay         }
369abef11f7SSatish Balay       }
370ab26458aSBarry Smith     }
371ab26458aSBarry Smith   }
3723a40ed3dSBarry Smith   PetscFunctionReturn(0);
373ab26458aSBarry Smith }
3740bdbc534SSatish Balay #include <math.h>
3750bdbc534SSatish Balay #define HASH_KEY 0.6180339887
376*c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
377*c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
378*c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
3795615d1e5SSatish Balay #undef __FUNC__
3800bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
3810bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
3820bdbc534SSatish Balay {
3830bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3840bdbc534SSatish Balay   int         ierr,i,j,row,col;
3850bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
386*c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
3870bdbc534SSatish Balay 
388*c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
389*c2760754SSatish Balay   double      tmp;
390b9e4cc15SSatish Balay   Scalar      ** HD = baij->hd,value;
3910bdbc534SSatish Balay 
3920bdbc534SSatish Balay 
3930bdbc534SSatish Balay   PetscFunctionBegin;
3940bdbc534SSatish Balay 
3950bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
3960bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
3970bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
3980bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
3990bdbc534SSatish Balay #endif
4000bdbc534SSatish Balay       row = im[i];
401*c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4020bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4030bdbc534SSatish Balay         col = in[j];
4040bdbc534SSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4050bdbc534SSatish Balay         /* Look up into the Hash Table */
406*c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
407*c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4080bdbc534SSatish Balay 
409*c2760754SSatish Balay 
410*c2760754SSatish Balay         idx = h1;
411*c2760754SSatish Balay         if (HT[idx] != key) {
412*c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
413*c2760754SSatish Balay           if (idx == size) {
414*c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
415*c2760754SSatish Balay             if (idx == h1) {
416*c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
417*c2760754SSatish Balay             }
418*c2760754SSatish Balay           }
419*c2760754SSatish Balay         }
420*c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
421*c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
422*c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
4230bdbc534SSatish Balay         break;
4240bdbc534SSatish Balay       }
4250bdbc534SSatish Balay     } else {
4260bdbc534SSatish Balay       if (roworiented && !baij->donotstash) {
4270bdbc534SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
4280bdbc534SSatish Balay       } else {
4290bdbc534SSatish Balay         if (!baij->donotstash) {
4300bdbc534SSatish Balay           row = im[i];
4310bdbc534SSatish Balay 	  for ( j=0; j<n; j++ ) {
4320bdbc534SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
4330bdbc534SSatish Balay           }
4340bdbc534SSatish Balay         }
4350bdbc534SSatish Balay       }
4360bdbc534SSatish Balay     }
4370bdbc534SSatish Balay   }
4380bdbc534SSatish Balay   PetscFunctionReturn(0);
4390bdbc534SSatish Balay }
4400bdbc534SSatish Balay 
4410bdbc534SSatish Balay #undef __FUNC__
4420bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
4430bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4440bdbc534SSatish Balay {
4450bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4460bdbc534SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
4470bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
448b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
4490bdbc534SSatish Balay 
450*c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
451*c2760754SSatish Balay   double      tmp;
452b9e4cc15SSatish Balay   Scalar      ** HD = baij->hd,*value,*baij_a;
4530bdbc534SSatish Balay 
454d0a41580SSatish Balay   PetscFunctionBegin;
455d0a41580SSatish Balay 
4560bdbc534SSatish Balay   if (roworiented) {
4570bdbc534SSatish Balay     stepval = (n-1)*bs;
4580bdbc534SSatish Balay   } else {
4590bdbc534SSatish Balay     stepval = (m-1)*bs;
4600bdbc534SSatish Balay   }
4610bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
4620bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
4630bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4640bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4650bdbc534SSatish Balay #endif
4660bdbc534SSatish Balay     row = im[i];
467*c2760754SSatish Balay     if (row >= rstart && row < rend) {
4680bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4690bdbc534SSatish Balay         col = in[j];
4700bdbc534SSatish Balay 
4710bdbc534SSatish Balay         /* Look up into the Hash Table */
472*c2760754SSatish Balay         key = row*Nbs+col+1;
473*c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4740bdbc534SSatish Balay 
475*c2760754SSatish Balay         idx = h1;
476*c2760754SSatish Balay         if (HT[idx] != key) {
477*c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
478*c2760754SSatish Balay           if (idx == size) {
479*c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
480*c2760754SSatish Balay             if (idx == h1) {
481*c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
482*c2760754SSatish Balay             }
483*c2760754SSatish Balay           }
484*c2760754SSatish Balay         }
485*c2760754SSatish Balay         baij_a = HD[idx];
4860bdbc534SSatish Balay         if (roworiented) {
487*c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
488*c2760754SSatish Balay           value = v + (i*(stepval+bs)+j)*bs;
489fef45726SSatish Balay           if (addv == ADD_VALUES) {
490*c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
491*c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
492fef45726SSatish Balay                 baij_a[jj]  += *value++;
493b4cc0f5aSSatish Balay               }
494b4cc0f5aSSatish Balay             }
495fef45726SSatish Balay           } else {
496*c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
497*c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
498fef45726SSatish Balay                 baij_a[jj]  = *value++;
499fef45726SSatish Balay               }
500fef45726SSatish Balay             }
501fef45726SSatish Balay           }
5020bdbc534SSatish Balay         } else {
5030bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
504fef45726SSatish Balay           if (addv == ADD_VALUES) {
505b4cc0f5aSSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
5060bdbc534SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
507fef45726SSatish Balay                 baij_a[jj]  += *value++;
508fef45726SSatish Balay               }
509fef45726SSatish Balay             }
510fef45726SSatish Balay           } else {
511fef45726SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
512fef45726SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
513fef45726SSatish Balay                 baij_a[jj]  = *value++;
514fef45726SSatish Balay               }
515b4cc0f5aSSatish Balay             }
5160bdbc534SSatish Balay           }
5170bdbc534SSatish Balay         }
5180bdbc534SSatish Balay       }
5190bdbc534SSatish Balay     } else {
5200bdbc534SSatish Balay       if (!baij->donotstash) {
5210bdbc534SSatish Balay         if (roworiented ) {
5220bdbc534SSatish Balay           row   = im[i]*bs;
5230bdbc534SSatish Balay           value = v + i*(stepval+bs)*bs;
5240bdbc534SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
5250bdbc534SSatish Balay             for ( k=0; k<n; k++ ) {
5260bdbc534SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
5270bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
5280bdbc534SSatish Balay               }
5290bdbc534SSatish Balay             }
5300bdbc534SSatish Balay           }
5310bdbc534SSatish Balay         } else {
5320bdbc534SSatish Balay           for ( j=0; j<n; j++ ) {
5330bdbc534SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
5340bdbc534SSatish Balay             col   = in[j]*bs;
5350bdbc534SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
5360bdbc534SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
5370bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
5380bdbc534SSatish Balay               }
5390bdbc534SSatish Balay             }
5400bdbc534SSatish Balay           }
5410bdbc534SSatish Balay         }
5420bdbc534SSatish Balay       }
5430bdbc534SSatish Balay     }
5440bdbc534SSatish Balay   }
5450bdbc534SSatish Balay   PetscFunctionReturn(0);
5460bdbc534SSatish Balay }
5470bdbc534SSatish Balay #undef __FUNC__
5485615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
549ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
550d6de1c52SSatish Balay {
551d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
552d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
553d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
554d6de1c52SSatish Balay 
555d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
556a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
557a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
558d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
559d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
560d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
561a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
562a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
563d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
564d6de1c52SSatish Balay           col = idxn[j] - bscstart;
565d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
566d64ed03dSBarry Smith         } else {
567905e6a2fSBarry Smith           if (!baij->colmap) {
568905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
569905e6a2fSBarry Smith           }
570e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
571dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
572d9d09a02SSatish Balay           else {
573dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
574d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
575d6de1c52SSatish Balay           }
576d6de1c52SSatish Balay         }
577d6de1c52SSatish Balay       }
578d64ed03dSBarry Smith     } else {
579a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
580d6de1c52SSatish Balay     }
581d6de1c52SSatish Balay   }
5823a40ed3dSBarry Smith   PetscFunctionReturn(0);
583d6de1c52SSatish Balay }
584d6de1c52SSatish Balay 
5855615d1e5SSatish Balay #undef __FUNC__
5865615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
587ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
588d6de1c52SSatish Balay {
589d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
590d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
591acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
592d6de1c52SSatish Balay   double     sum = 0.0;
593d6de1c52SSatish Balay   Scalar     *v;
594d6de1c52SSatish Balay 
595d64ed03dSBarry Smith   PetscFunctionBegin;
596d6de1c52SSatish Balay   if (baij->size == 1) {
597d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
598d6de1c52SSatish Balay   } else {
599d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
600d6de1c52SSatish Balay       v = amat->a;
601d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
6023a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
603d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
604d6de1c52SSatish Balay #else
605d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
606d6de1c52SSatish Balay #endif
607d6de1c52SSatish Balay       }
608d6de1c52SSatish Balay       v = bmat->a;
609d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
6103a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
611d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
612d6de1c52SSatish Balay #else
613d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
614d6de1c52SSatish Balay #endif
615d6de1c52SSatish Balay       }
616ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
617d6de1c52SSatish Balay       *norm = sqrt(*norm);
618d64ed03dSBarry Smith     } else {
619e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
620d6de1c52SSatish Balay     }
621d64ed03dSBarry Smith   }
6223a40ed3dSBarry Smith   PetscFunctionReturn(0);
623d6de1c52SSatish Balay }
62457b952d6SSatish Balay 
6255615d1e5SSatish Balay #undef __FUNC__
6265615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
627ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
62857b952d6SSatish Balay {
62957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
63057b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
63157b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
63257b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
63357b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
63457b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
63557b952d6SSatish Balay   InsertMode  addv;
63657b952d6SSatish Balay   Scalar      *rvalues,*svalues;
63757b952d6SSatish Balay 
638d64ed03dSBarry Smith   PetscFunctionBegin;
63957b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
640ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
64157b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
642a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
64357b952d6SSatish Balay   }
644e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
64557b952d6SSatish Balay 
64657b952d6SSatish Balay   /*  first count number of contributors to each processor */
64757b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
64857b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
64957b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
65057b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
65157b952d6SSatish Balay     idx = baij->stash.idx[i];
65257b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
65357b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
65457b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
65557b952d6SSatish Balay       }
65657b952d6SSatish Balay     }
65757b952d6SSatish Balay   }
65857b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
65957b952d6SSatish Balay 
66057b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
66157b952d6SSatish Balay   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
662ca161407SBarry Smith   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
66357b952d6SSatish Balay   nreceives = work[rank];
664ca161407SBarry Smith   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
66557b952d6SSatish Balay   nmax      = work[rank];
66657b952d6SSatish Balay   PetscFree(work);
66757b952d6SSatish Balay 
66857b952d6SSatish Balay   /* post receives:
66957b952d6SSatish Balay        1) each message will consist of ordered pairs
67057b952d6SSatish Balay      (global index,value) we store the global index as a double
67157b952d6SSatish Balay      to simplify the message passing.
67257b952d6SSatish Balay        2) since we don't know how long each individual message is we
67357b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
67457b952d6SSatish Balay      this is a lot of wasted space.
67557b952d6SSatish Balay 
67657b952d6SSatish Balay 
67757b952d6SSatish Balay        This could be done better.
67857b952d6SSatish Balay   */
679f8abc2e8SBarry Smith   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
680f8abc2e8SBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
68157b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
682ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
68357b952d6SSatish Balay   }
68457b952d6SSatish Balay 
68557b952d6SSatish Balay   /* do sends:
68657b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
68757b952d6SSatish Balay          the ith processor
68857b952d6SSatish Balay   */
68957b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
690d64ed03dSBarry Smith   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
69157b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
69257b952d6SSatish Balay   starts[0] = 0;
69357b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
69457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
69557b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
69657b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
69757b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
69857b952d6SSatish Balay   }
69957b952d6SSatish Balay   PetscFree(owner);
70057b952d6SSatish Balay   starts[0] = 0;
70157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
70257b952d6SSatish Balay   count = 0;
70357b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
70457b952d6SSatish Balay     if (procs[i]) {
705ca161407SBarry Smith       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
70657b952d6SSatish Balay     }
70757b952d6SSatish Balay   }
70857b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
70957b952d6SSatish Balay 
71057b952d6SSatish Balay   /* Free cache space */
711cd1fa5fbSBarry Smith   PLogInfo(0,"[%d]MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",rank,baij->stash.n);
71257b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
71357b952d6SSatish Balay 
71457b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
71557b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
71657b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
71757b952d6SSatish Balay   baij->rmax       = nmax;
71857b952d6SSatish Balay 
7193a40ed3dSBarry Smith   PetscFunctionReturn(0);
72057b952d6SSatish Balay }
721bd7f49f5SSatish Balay 
722fef45726SSatish Balay /*
723fef45726SSatish Balay   Creates the hash table, and sets the table
724fef45726SSatish Balay   This table is created only once.
725fef45726SSatish Balay   If new entried need to be added to the matrix
726fef45726SSatish Balay   then the hash table has to be destroyed and
727fef45726SSatish Balay   recreated.
728fef45726SSatish Balay */
729fef45726SSatish Balay #undef __FUNC__
730fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
731d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
732596b8d2eSBarry Smith {
733596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
734596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
735596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
7360bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
7370bdbc534SSatish Balay   int         size,ct=0,max1=0,max2=0,bs2=baij->bs2,rstart=baij->rstart;
7380bdbc534SSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col;
739fef45726SSatish Balay   int         *HT,key;
7400bdbc534SSatish Balay   Scalar      **HD;
741*c2760754SSatish Balay   double      tmp;
742fef45726SSatish Balay 
743d64ed03dSBarry Smith   PetscFunctionBegin;
7440bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
7450bdbc534SSatish Balay   size = baij->ht_size;
746fef45726SSatish Balay 
7470bdbc534SSatish Balay   if (baij->ht) {
7480bdbc534SSatish Balay     PetscFunctionReturn(0);
749596b8d2eSBarry Smith   }
7500bdbc534SSatish Balay 
751fef45726SSatish Balay   /* Allocate Memory for Hash Table */
752b9e4cc15SSatish Balay   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
753b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
754b9e4cc15SSatish Balay   HD = baij->hd;
755a07cd24cSSatish Balay   HT = baij->ht;
756b9e4cc15SSatish Balay 
757b9e4cc15SSatish Balay 
758*c2760754SSatish Balay   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
7590bdbc534SSatish Balay 
760596b8d2eSBarry Smith 
761596b8d2eSBarry Smith   /* Loop Over A */
7620bdbc534SSatish Balay   for ( i=0; i<a->mbs; i++ ) {
763596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
7640bdbc534SSatish Balay       row = i+rstart;
7650bdbc534SSatish Balay       col = aj[j]+cstart;
766596b8d2eSBarry Smith 
7670bdbc534SSatish Balay       key = row*baij->Nbs + col + 1;
7680bdbc534SSatish Balay       /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */
769*c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7700bdbc534SSatish Balay 
7710bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7720bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7730bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7740bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
775596b8d2eSBarry Smith           break;
776596b8d2eSBarry Smith         } else ct++;
777596b8d2eSBarry Smith       }
778bd7f49f5SSatish Balay       if (k> max1) max1 = k;
779596b8d2eSBarry Smith     }
780596b8d2eSBarry Smith   }
781596b8d2eSBarry Smith   /* Loop Over B */
7820bdbc534SSatish Balay   for ( i=0; i<b->mbs; i++ ) {
783596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
7840bdbc534SSatish Balay       row = i+rstart;
7850bdbc534SSatish Balay       col = garray[bj[j]];
7860bdbc534SSatish Balay       key = row*baij->Nbs + col + 1;
7870bdbc534SSatish Balay       /* printf("[%d]row,col,key = %2d,%2d,%5.2f\n",PetscGlobalRank,row,col,key); */
788*c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7890bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7900bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7910bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7920bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
793596b8d2eSBarry Smith           break;
794596b8d2eSBarry Smith         } else ct++;
795596b8d2eSBarry Smith       }
796bd7f49f5SSatish Balay       if (k> max2) max2 = k;
797596b8d2eSBarry Smith     }
798596b8d2eSBarry Smith   }
799596b8d2eSBarry Smith 
800596b8d2eSBarry Smith   /* Print Summary */
801*c2760754SSatish Balay   for ( i=0,j=0; i<size; i++)
802596b8d2eSBarry Smith     if (HT[i]) {j++;}
803596b8d2eSBarry Smith 
8040bdbc534SSatish Balay   /*printf("[%d] fact = %3.2f max1 = %5d max2 = %5d Size %5d - Searches %5d Avg %5.2f Keys %5d\n",
8050bdbc534SSatish Balay          PetscGlobalRank,factor,max1,max2,size,ct+j,((float)ct+j),j); */
8060bdbc534SSatish Balay   fflush(stdout);
8073a40ed3dSBarry Smith   PetscFunctionReturn(0);
808596b8d2eSBarry Smith }
80957b952d6SSatish Balay 
8105615d1e5SSatish Balay #undef __FUNC__
8115615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
812ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
81357b952d6SSatish Balay {
81457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
81557b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
81657b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
817596b8d2eSBarry Smith   int         bs=baij->bs,row,col,other_disassembled,flg;
81857b952d6SSatish Balay   Scalar      *values,val;
819e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
82057b952d6SSatish Balay 
821d64ed03dSBarry Smith   PetscFunctionBegin;
82257b952d6SSatish Balay   /*  wait on receives */
82357b952d6SSatish Balay   while (count) {
824ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
82557b952d6SSatish Balay     /* unpack receives into our local space */
82657b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
827ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
82857b952d6SSatish Balay     n = n/3;
82957b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
83057b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
83157b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
83257b952d6SSatish Balay       val = values[3*i+2];
83357b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
83457b952d6SSatish Balay         col -= baij->cstart*bs;
8356fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
836d64ed03dSBarry Smith       } else {
83757b952d6SSatish Balay         if (mat->was_assembled) {
838905e6a2fSBarry Smith           if (!baij->colmap) {
839905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
840905e6a2fSBarry Smith           }
841a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
84257b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
84357b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
84457b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
84557b952d6SSatish Balay           }
84657b952d6SSatish Balay         }
8476fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
84857b952d6SSatish Balay       }
84957b952d6SSatish Balay     }
85057b952d6SSatish Balay     count--;
85157b952d6SSatish Balay   }
85257b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
85357b952d6SSatish Balay 
85457b952d6SSatish Balay   /* wait on sends */
85557b952d6SSatish Balay   if (baij->nsends) {
856d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
857ca161407SBarry Smith     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
85857b952d6SSatish Balay     PetscFree(send_status);
85957b952d6SSatish Balay   }
86057b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
86157b952d6SSatish Balay 
86257b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
86357b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
86457b952d6SSatish Balay 
86557b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
86657b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
867ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
86857b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
86957b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
87057b952d6SSatish Balay   }
87157b952d6SSatish Balay 
8726d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
87357b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
87457b952d6SSatish Balay   }
87557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
87657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
87757b952d6SSatish Balay 
878596b8d2eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr);
87944e6884eSSatish Balay   if (flg && !baij->ht && mode== MAT_FINAL_ASSEMBLY) {
880a07cd24cSSatish Balay     double fact = 1.39;
881fef45726SSatish Balay     ierr = OptionsGetDouble(PETSC_NULL,"-use_hash",&fact,&flg); CHKERRQ(ierr);
882a74b47caSSatish Balay     if (fact <= 1.0) fact = 1.39;
883a74b47caSSatish Balay     PLogInfo(0,"[%d]MatAssemblyEnd_MPIBAIJ:Hash table Factor used %5.2f\n",PetscGlobalRank,fact);
884fef45726SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,fact); CHKERRQ(ierr);
8850bdbc534SSatish Balay     mat->ops.setvalues        = MatSetValues_MPIBAIJ_HT;
8860bdbc534SSatish Balay     mat->ops.setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
887bd7f49f5SSatish Balay   }
88857b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
8893a40ed3dSBarry Smith   PetscFunctionReturn(0);
89057b952d6SSatish Balay }
89157b952d6SSatish Balay 
8925615d1e5SSatish Balay #undef __FUNC__
8935615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
89457b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
89557b952d6SSatish Balay {
89657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
89757b952d6SSatish Balay   int          ierr;
89857b952d6SSatish Balay 
899d64ed03dSBarry Smith   PetscFunctionBegin;
90057b952d6SSatish Balay   if (baij->size == 1) {
90157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
902a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
9033a40ed3dSBarry Smith   PetscFunctionReturn(0);
90457b952d6SSatish Balay }
90557b952d6SSatish Balay 
9065615d1e5SSatish Balay #undef __FUNC__
9075615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
90857b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
90957b952d6SSatish Balay {
91057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
911cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
91257b952d6SSatish Balay   FILE         *fd;
91357b952d6SSatish Balay   ViewerType   vtype;
91457b952d6SSatish Balay 
915d64ed03dSBarry Smith   PetscFunctionBegin;
91657b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
91757b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
91857b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
919639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
9204e220ebcSLois Curfman McInnes       MatInfo info;
92157b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
92257b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
9234e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
92457b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
92557b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
9264e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
9274e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
9284e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
9294e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
9304e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
9314e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
93257b952d6SSatish Balay       fflush(fd);
93357b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
93457b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
9353a40ed3dSBarry Smith       PetscFunctionReturn(0);
936d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
937bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
9383a40ed3dSBarry Smith       PetscFunctionReturn(0);
93957b952d6SSatish Balay     }
94057b952d6SSatish Balay   }
94157b952d6SSatish Balay 
94257b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
94357b952d6SSatish Balay     Draw       draw;
94457b952d6SSatish Balay     PetscTruth isnull;
94557b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
9463a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
94757b952d6SSatish Balay   }
94857b952d6SSatish Balay 
94957b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
95057b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
95157b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
95257b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
95357b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
95457b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
95557b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
95657b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
95757b952d6SSatish Balay     fflush(fd);
95857b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
959d64ed03dSBarry Smith   } else {
96057b952d6SSatish Balay     int size = baij->size;
96157b952d6SSatish Balay     rank = baij->rank;
96257b952d6SSatish Balay     if (size == 1) {
96357b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
964d64ed03dSBarry Smith     } else {
96557b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
96657b952d6SSatish Balay       Mat         A;
96757b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
96857b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
96957b952d6SSatish Balay       int         mbs=baij->mbs;
97057b952d6SSatish Balay       Scalar      *a;
97157b952d6SSatish Balay 
97257b952d6SSatish Balay       if (!rank) {
97355843e3eSBarry Smith         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
974d64ed03dSBarry Smith       } else {
97555843e3eSBarry Smith         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
97657b952d6SSatish Balay       }
97757b952d6SSatish Balay       PLogObjectParent(mat,A);
97857b952d6SSatish Balay 
97957b952d6SSatish Balay       /* copy over the A part */
98057b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
98157b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
98257b952d6SSatish Balay       row = baij->rstart;
98357b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
98457b952d6SSatish Balay 
98557b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
98657b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
98757b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
98857b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
98957b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
99057b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
991cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
992cee3aa6bSSatish Balay             col++; a += bs;
99357b952d6SSatish Balay           }
99457b952d6SSatish Balay         }
99557b952d6SSatish Balay       }
99657b952d6SSatish Balay       /* copy over the B part */
99757b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
99857b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
99957b952d6SSatish Balay       row = baij->rstart*bs;
100057b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
100157b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
100257b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
100357b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
100457b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
100557b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
1006cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1007cee3aa6bSSatish Balay             col++; a += bs;
100857b952d6SSatish Balay           }
100957b952d6SSatish Balay         }
101057b952d6SSatish Balay       }
101157b952d6SSatish Balay       PetscFree(rvals);
10126d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
10136d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
101455843e3eSBarry Smith       /*
101555843e3eSBarry Smith          Everyone has to call to draw the matrix since the graphics waits are
101655843e3eSBarry Smith          synchronized across all processors that share the Draw object
101755843e3eSBarry Smith       */
101855843e3eSBarry Smith       if (!rank || vtype == DRAW_VIEWER) {
101957b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
102057b952d6SSatish Balay       }
102157b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
102257b952d6SSatish Balay     }
102357b952d6SSatish Balay   }
10243a40ed3dSBarry Smith   PetscFunctionReturn(0);
102557b952d6SSatish Balay }
102657b952d6SSatish Balay 
102757b952d6SSatish Balay 
102857b952d6SSatish Balay 
10295615d1e5SSatish Balay #undef __FUNC__
10305615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
1031ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
103257b952d6SSatish Balay {
103357b952d6SSatish Balay   Mat         mat = (Mat) obj;
103457b952d6SSatish Balay   int         ierr;
103557b952d6SSatish Balay   ViewerType  vtype;
103657b952d6SSatish Balay 
1037d64ed03dSBarry Smith   PetscFunctionBegin;
103857b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
103957b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
104057b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
104157b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
10423a40ed3dSBarry Smith   } else if (vtype == BINARY_FILE_VIEWER) {
10433a40ed3dSBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
104457b952d6SSatish Balay   }
10453a40ed3dSBarry Smith   PetscFunctionReturn(0);
104657b952d6SSatish Balay }
104757b952d6SSatish Balay 
10485615d1e5SSatish Balay #undef __FUNC__
10495615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
1050ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
105179bdfe76SSatish Balay {
105279bdfe76SSatish Balay   Mat         mat = (Mat) obj;
105379bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
105479bdfe76SSatish Balay   int         ierr;
105579bdfe76SSatish Balay 
1056d64ed03dSBarry Smith   PetscFunctionBegin;
10573a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
105879bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
105979bdfe76SSatish Balay #endif
106079bdfe76SSatish Balay 
106183e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
106279bdfe76SSatish Balay   PetscFree(baij->rowners);
106379bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
106479bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
106579bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
106679bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
106779bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
106879bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
106979bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
107030793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
1071b9e4cc15SSatish Balay   if (baij->hd) PetscFree(baij->hd);
107279bdfe76SSatish Balay   PetscFree(baij);
107379bdfe76SSatish Balay   PLogObjectDestroy(mat);
107479bdfe76SSatish Balay   PetscHeaderDestroy(mat);
10753a40ed3dSBarry Smith   PetscFunctionReturn(0);
107679bdfe76SSatish Balay }
107779bdfe76SSatish Balay 
10785615d1e5SSatish Balay #undef __FUNC__
10795615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
1080ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1081cee3aa6bSSatish Balay {
1082cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
108347b4a8eaSLois Curfman McInnes   int         ierr, nt;
1084cee3aa6bSSatish Balay 
1085d64ed03dSBarry Smith   PetscFunctionBegin;
1086c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
108747b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1088a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
108947b4a8eaSLois Curfman McInnes   }
1090c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
109147b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1092a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
109347b4a8eaSLois Curfman McInnes   }
109443a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1095cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
109643a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1097cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
109843a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
10993a40ed3dSBarry Smith   PetscFunctionReturn(0);
1100cee3aa6bSSatish Balay }
1101cee3aa6bSSatish Balay 
11025615d1e5SSatish Balay #undef __FUNC__
11035615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
1104ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1105cee3aa6bSSatish Balay {
1106cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1107cee3aa6bSSatish Balay   int        ierr;
1108d64ed03dSBarry Smith 
1109d64ed03dSBarry Smith   PetscFunctionBegin;
111043a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1111cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
111243a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1113cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
11143a40ed3dSBarry Smith   PetscFunctionReturn(0);
1115cee3aa6bSSatish Balay }
1116cee3aa6bSSatish Balay 
11175615d1e5SSatish Balay #undef __FUNC__
11185615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
1119ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1120cee3aa6bSSatish Balay {
1121cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1122cee3aa6bSSatish Balay   int         ierr;
1123cee3aa6bSSatish Balay 
1124d64ed03dSBarry Smith   PetscFunctionBegin;
1125cee3aa6bSSatish Balay   /* do nondiagonal part */
1126cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1127cee3aa6bSSatish Balay   /* send it on its way */
1128537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1129cee3aa6bSSatish Balay   /* do local part */
1130cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1131cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1132cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1133cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1134639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
11353a40ed3dSBarry Smith   PetscFunctionReturn(0);
1136cee3aa6bSSatish Balay }
1137cee3aa6bSSatish Balay 
11385615d1e5SSatish Balay #undef __FUNC__
11395615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1140ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1141cee3aa6bSSatish Balay {
1142cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1143cee3aa6bSSatish Balay   int         ierr;
1144cee3aa6bSSatish Balay 
1145d64ed03dSBarry Smith   PetscFunctionBegin;
1146cee3aa6bSSatish Balay   /* do nondiagonal part */
1147cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1148cee3aa6bSSatish Balay   /* send it on its way */
1149537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1150cee3aa6bSSatish Balay   /* do local part */
1151cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1152cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1153cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1154cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1155537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
11563a40ed3dSBarry Smith   PetscFunctionReturn(0);
1157cee3aa6bSSatish Balay }
1158cee3aa6bSSatish Balay 
1159cee3aa6bSSatish Balay /*
1160cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1161cee3aa6bSSatish Balay    diagonal block
1162cee3aa6bSSatish Balay */
11635615d1e5SSatish Balay #undef __FUNC__
11645615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1165ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1166cee3aa6bSSatish Balay {
1167cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
11683a40ed3dSBarry Smith   int         ierr;
1169d64ed03dSBarry Smith 
1170d64ed03dSBarry Smith   PetscFunctionBegin;
1171a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
11723a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
11733a40ed3dSBarry Smith   PetscFunctionReturn(0);
1174cee3aa6bSSatish Balay }
1175cee3aa6bSSatish Balay 
11765615d1e5SSatish Balay #undef __FUNC__
11775615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
1178ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1179cee3aa6bSSatish Balay {
1180cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1181cee3aa6bSSatish Balay   int         ierr;
1182d64ed03dSBarry Smith 
1183d64ed03dSBarry Smith   PetscFunctionBegin;
1184cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1185cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
11863a40ed3dSBarry Smith   PetscFunctionReturn(0);
1187cee3aa6bSSatish Balay }
1188026e39d0SSatish Balay 
11895615d1e5SSatish Balay #undef __FUNC__
11905615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
1191ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
119257b952d6SSatish Balay {
119357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1194d64ed03dSBarry Smith 
1195d64ed03dSBarry Smith   PetscFunctionBegin;
1196bd7f49f5SSatish Balay   if (m) *m = mat->M;
1197bd7f49f5SSatish Balay   if (n) *n = mat->N;
11983a40ed3dSBarry Smith   PetscFunctionReturn(0);
119957b952d6SSatish Balay }
120057b952d6SSatish Balay 
12015615d1e5SSatish Balay #undef __FUNC__
12025615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1203ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
120457b952d6SSatish Balay {
120557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1206d64ed03dSBarry Smith 
1207d64ed03dSBarry Smith   PetscFunctionBegin;
120857b952d6SSatish Balay   *m = mat->m; *n = mat->N;
12093a40ed3dSBarry Smith   PetscFunctionReturn(0);
121057b952d6SSatish Balay }
121157b952d6SSatish Balay 
12125615d1e5SSatish Balay #undef __FUNC__
12135615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1214ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
121557b952d6SSatish Balay {
121657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1217d64ed03dSBarry Smith 
1218d64ed03dSBarry Smith   PetscFunctionBegin;
121957b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
12203a40ed3dSBarry Smith   PetscFunctionReturn(0);
122157b952d6SSatish Balay }
122257b952d6SSatish Balay 
1223acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1224acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1225acdf5bf4SSatish Balay 
12265615d1e5SSatish Balay #undef __FUNC__
12275615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1228acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1229acdf5bf4SSatish Balay {
1230acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1231acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1232acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1233d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1234d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1235acdf5bf4SSatish Balay 
1236d64ed03dSBarry Smith   PetscFunctionBegin;
1237a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1238acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1239acdf5bf4SSatish Balay 
1240acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1241acdf5bf4SSatish Balay     /*
1242acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1243acdf5bf4SSatish Balay     */
1244acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1245bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1246bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1247acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1248acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1249acdf5bf4SSatish Balay     }
1250acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1251acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1252acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1253acdf5bf4SSatish Balay   }
1254acdf5bf4SSatish Balay 
1255a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1256d9d09a02SSatish Balay   lrow = row - brstart;
1257acdf5bf4SSatish Balay 
1258acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1259acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1260acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1261acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1262acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1263acdf5bf4SSatish Balay   nztot = nzA + nzB;
1264acdf5bf4SSatish Balay 
1265acdf5bf4SSatish Balay   cmap  = mat->garray;
1266acdf5bf4SSatish Balay   if (v  || idx) {
1267acdf5bf4SSatish Balay     if (nztot) {
1268acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1269acdf5bf4SSatish Balay       int imark = -1;
1270acdf5bf4SSatish Balay       if (v) {
1271acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1272acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1273d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1274acdf5bf4SSatish Balay           else break;
1275acdf5bf4SSatish Balay         }
1276acdf5bf4SSatish Balay         imark = i;
1277acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1278acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1279acdf5bf4SSatish Balay       }
1280acdf5bf4SSatish Balay       if (idx) {
1281acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1282acdf5bf4SSatish Balay         if (imark > -1) {
1283acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1284bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1285acdf5bf4SSatish Balay           }
1286acdf5bf4SSatish Balay         } else {
1287acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1288d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1289d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1290acdf5bf4SSatish Balay             else break;
1291acdf5bf4SSatish Balay           }
1292acdf5bf4SSatish Balay           imark = i;
1293acdf5bf4SSatish Balay         }
1294d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1295d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1296acdf5bf4SSatish Balay       }
1297d64ed03dSBarry Smith     } else {
1298d212a18eSSatish Balay       if (idx) *idx = 0;
1299d212a18eSSatish Balay       if (v)   *v   = 0;
1300d212a18eSSatish Balay     }
1301acdf5bf4SSatish Balay   }
1302acdf5bf4SSatish Balay   *nz = nztot;
1303acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1304acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
13053a40ed3dSBarry Smith   PetscFunctionReturn(0);
1306acdf5bf4SSatish Balay }
1307acdf5bf4SSatish Balay 
13085615d1e5SSatish Balay #undef __FUNC__
13095615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1310acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1311acdf5bf4SSatish Balay {
1312acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1313d64ed03dSBarry Smith 
1314d64ed03dSBarry Smith   PetscFunctionBegin;
1315acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1316a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1317acdf5bf4SSatish Balay   }
1318acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
13193a40ed3dSBarry Smith   PetscFunctionReturn(0);
1320acdf5bf4SSatish Balay }
1321acdf5bf4SSatish Balay 
13225615d1e5SSatish Balay #undef __FUNC__
13235615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1324ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
13255a838052SSatish Balay {
13265a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1327d64ed03dSBarry Smith 
1328d64ed03dSBarry Smith   PetscFunctionBegin;
13295a838052SSatish Balay   *bs = baij->bs;
13303a40ed3dSBarry Smith   PetscFunctionReturn(0);
13315a838052SSatish Balay }
13325a838052SSatish Balay 
13335615d1e5SSatish Balay #undef __FUNC__
13345615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1335ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
133658667388SSatish Balay {
133758667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
133858667388SSatish Balay   int         ierr;
1339d64ed03dSBarry Smith 
1340d64ed03dSBarry Smith   PetscFunctionBegin;
134158667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
134258667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
13433a40ed3dSBarry Smith   PetscFunctionReturn(0);
134458667388SSatish Balay }
13450ac07820SSatish Balay 
13465615d1e5SSatish Balay #undef __FUNC__
13475615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1348ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
13490ac07820SSatish Balay {
13504e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
13514e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
13527d57db60SLois Curfman McInnes   int         ierr;
13537d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
13540ac07820SSatish Balay 
1355d64ed03dSBarry Smith   PetscFunctionBegin;
13564e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
13574e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
13584e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
13594e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
13604e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
13610ac07820SSatish Balay   if (flag == MAT_LOCAL) {
13624e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
13634e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
13644e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
13654e220ebcSLois Curfman McInnes     info->memory       = isend[3];
13664e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
13670ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1368ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr);
13694e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
13704e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
13714e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
13724e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
13734e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
13740ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1375ca161407SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr);
13764e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
13774e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
13784e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
13794e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
13804e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
13810ac07820SSatish Balay   }
13825a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
13835a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
13845a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
13855a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
13864e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
13874e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
13884e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
13893a40ed3dSBarry Smith   PetscFunctionReturn(0);
13900ac07820SSatish Balay }
13910ac07820SSatish Balay 
13925615d1e5SSatish Balay #undef __FUNC__
13935615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1394ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
139558667388SSatish Balay {
139658667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
139758667388SSatish Balay 
1398d64ed03dSBarry Smith   PetscFunctionBegin;
139958667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
140058667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
14016da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1402c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
140396854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1404c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1405b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1406b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1407b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1408aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
140958667388SSatish Balay         MatSetOption(a->A,op);
141058667388SSatish Balay         MatSetOption(a->B,op);
1411b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
14126da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
141358667388SSatish Balay              op == MAT_SYMMETRIC ||
141458667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
141558667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
141658667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
141758667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
141858667388SSatish Balay     a->roworiented = 0;
141958667388SSatish Balay     MatSetOption(a->A,op);
142058667388SSatish Balay     MatSetOption(a->B,op);
14212b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
142290f02eecSBarry Smith     a->donotstash = 1;
1423d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1424d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1425d64ed03dSBarry Smith   } else {
1426d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1427d64ed03dSBarry Smith   }
14283a40ed3dSBarry Smith   PetscFunctionReturn(0);
142958667388SSatish Balay }
143058667388SSatish Balay 
14315615d1e5SSatish Balay #undef __FUNC__
14325615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1433ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
14340ac07820SSatish Balay {
14350ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
14360ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
14370ac07820SSatish Balay   Mat        B;
14380ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
14390ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
14400ac07820SSatish Balay   Scalar     *a;
14410ac07820SSatish Balay 
1442d64ed03dSBarry Smith   PetscFunctionBegin;
1443a8c6a408SBarry Smith   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
14440ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
14450ac07820SSatish Balay   CHKERRQ(ierr);
14460ac07820SSatish Balay 
14470ac07820SSatish Balay   /* copy over the A part */
14480ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
14490ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
14500ac07820SSatish Balay   row = baij->rstart;
14510ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
14520ac07820SSatish Balay 
14530ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
14540ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
14550ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
14560ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
14570ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
14580ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
14590ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
14600ac07820SSatish Balay         col++; a += bs;
14610ac07820SSatish Balay       }
14620ac07820SSatish Balay     }
14630ac07820SSatish Balay   }
14640ac07820SSatish Balay   /* copy over the B part */
14650ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
14660ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
14670ac07820SSatish Balay   row = baij->rstart*bs;
14680ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
14690ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
14700ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
14710ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
14720ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
14730ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
14740ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
14750ac07820SSatish Balay         col++; a += bs;
14760ac07820SSatish Balay       }
14770ac07820SSatish Balay     }
14780ac07820SSatish Balay   }
14790ac07820SSatish Balay   PetscFree(rvals);
14800ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
14810ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
14820ac07820SSatish Balay 
14830ac07820SSatish Balay   if (matout != PETSC_NULL) {
14840ac07820SSatish Balay     *matout = B;
14850ac07820SSatish Balay   } else {
14860ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
14870ac07820SSatish Balay     PetscFree(baij->rowners);
14880ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
14890ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
14900ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
14910ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
14920ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
14930ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
14940ac07820SSatish Balay     PetscFree(baij);
1495f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
14960ac07820SSatish Balay     PetscHeaderDestroy(B);
14970ac07820SSatish Balay   }
14983a40ed3dSBarry Smith   PetscFunctionReturn(0);
14990ac07820SSatish Balay }
15000e95ebc0SSatish Balay 
15015615d1e5SSatish Balay #undef __FUNC__
15025615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
15030e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
15040e95ebc0SSatish Balay {
15050e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
15060e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
15070e95ebc0SSatish Balay   int ierr,s1,s2,s3;
15080e95ebc0SSatish Balay 
1509d64ed03dSBarry Smith   PetscFunctionBegin;
15100e95ebc0SSatish Balay   if (ll)  {
15110e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
15120e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1513a8c6a408SBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
15140e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
15150e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
15160e95ebc0SSatish Balay   }
1517a8c6a408SBarry Smith   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
15183a40ed3dSBarry Smith   PetscFunctionReturn(0);
15190e95ebc0SSatish Balay }
15200e95ebc0SSatish Balay 
15215615d1e5SSatish Balay #undef __FUNC__
15225615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1523ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
15240ac07820SSatish Balay {
15250ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
15260ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1527a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
15280ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
15290ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1530a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
15310ac07820SSatish Balay   MPI_Comm       comm = A->comm;
15320ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
15330ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
15340ac07820SSatish Balay   IS             istmp;
15350ac07820SSatish Balay 
1536d64ed03dSBarry Smith   PetscFunctionBegin;
15370ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
15380ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
15390ac07820SSatish Balay 
15400ac07820SSatish Balay   /*  first count number of contributors to each processor */
15410ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
15420ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
15430ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
15440ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
15450ac07820SSatish Balay     idx = rows[i];
15460ac07820SSatish Balay     found = 0;
15470ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
15480ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
15490ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
15500ac07820SSatish Balay       }
15510ac07820SSatish Balay     }
1552a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
15530ac07820SSatish Balay   }
15540ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
15550ac07820SSatish Balay 
15560ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
15570ac07820SSatish Balay   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1558ca161407SBarry Smith   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
15590ac07820SSatish Balay   nrecvs = work[rank];
1560ca161407SBarry Smith   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
15610ac07820SSatish Balay   nmax   = work[rank];
15620ac07820SSatish Balay   PetscFree(work);
15630ac07820SSatish Balay 
15640ac07820SSatish Balay   /* post receives:   */
1565d64ed03dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1566d64ed03dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
15670ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
1568ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
15690ac07820SSatish Balay   }
15700ac07820SSatish Balay 
15710ac07820SSatish Balay   /* do sends:
15720ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
15730ac07820SSatish Balay      the ith processor
15740ac07820SSatish Balay   */
15750ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1576ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
15770ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
15780ac07820SSatish Balay   starts[0] = 0;
15790ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
15800ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
15810ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
15820ac07820SSatish Balay   }
15830ac07820SSatish Balay   ISRestoreIndices(is,&rows);
15840ac07820SSatish Balay 
15850ac07820SSatish Balay   starts[0] = 0;
15860ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
15870ac07820SSatish Balay   count = 0;
15880ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
15890ac07820SSatish Balay     if (procs[i]) {
1590ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
15910ac07820SSatish Balay     }
15920ac07820SSatish Balay   }
15930ac07820SSatish Balay   PetscFree(starts);
15940ac07820SSatish Balay 
15950ac07820SSatish Balay   base = owners[rank]*bs;
15960ac07820SSatish Balay 
15970ac07820SSatish Balay   /*  wait on receives */
15980ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
15990ac07820SSatish Balay   source = lens + nrecvs;
16000ac07820SSatish Balay   count  = nrecvs; slen = 0;
16010ac07820SSatish Balay   while (count) {
1602ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
16030ac07820SSatish Balay     /* unpack receives into our local space */
1604ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
16050ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
16060ac07820SSatish Balay     lens[imdex]  = n;
16070ac07820SSatish Balay     slen += n;
16080ac07820SSatish Balay     count--;
16090ac07820SSatish Balay   }
16100ac07820SSatish Balay   PetscFree(recv_waits);
16110ac07820SSatish Balay 
16120ac07820SSatish Balay   /* move the data into the send scatter */
16130ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
16140ac07820SSatish Balay   count = 0;
16150ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
16160ac07820SSatish Balay     values = rvalues + i*nmax;
16170ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
16180ac07820SSatish Balay       lrows[count++] = values[j] - base;
16190ac07820SSatish Balay     }
16200ac07820SSatish Balay   }
16210ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
16220ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
16230ac07820SSatish Balay 
16240ac07820SSatish Balay   /* actually zap the local rows */
1625029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
16260ac07820SSatish Balay   PLogObjectParent(A,istmp);
1627a07cd24cSSatish Balay 
1628a07cd24cSSatish Balay   ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
16290ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
16300ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
16310ac07820SSatish Balay 
1632a07cd24cSSatish Balay   if (diag) {
1633a07cd24cSSatish Balay     for ( i = 0; i < slen; i++ ) {
1634a07cd24cSSatish Balay       row = lrows[i] + rstart_bs;
1635a07cd24cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1636a07cd24cSSatish Balay     }
1637a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1638a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1639a07cd24cSSatish Balay   }
1640a07cd24cSSatish Balay   PetscFree(lrows);
1641a07cd24cSSatish Balay 
16420ac07820SSatish Balay   /* wait on sends */
16430ac07820SSatish Balay   if (nsends) {
1644d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1645ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
16460ac07820SSatish Balay     PetscFree(send_status);
16470ac07820SSatish Balay   }
16480ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
16490ac07820SSatish Balay 
16503a40ed3dSBarry Smith   PetscFunctionReturn(0);
16510ac07820SSatish Balay }
1652ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
16535615d1e5SSatish Balay #undef __FUNC__
16545615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1655ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1656ba4ca20aSSatish Balay {
1657ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
16583a40ed3dSBarry Smith   int         ierr;
1659ba4ca20aSSatish Balay 
1660d64ed03dSBarry Smith   PetscFunctionBegin;
16613a40ed3dSBarry Smith   if (!a->rank) {
16623a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
16633a40ed3dSBarry Smith   }
16643a40ed3dSBarry Smith   PetscFunctionReturn(0);
1665ba4ca20aSSatish Balay }
16660ac07820SSatish Balay 
16675615d1e5SSatish Balay #undef __FUNC__
16685615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1669ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1670bb5a7306SBarry Smith {
1671bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1672bb5a7306SBarry Smith   int         ierr;
1673d64ed03dSBarry Smith 
1674d64ed03dSBarry Smith   PetscFunctionBegin;
1675bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
16763a40ed3dSBarry Smith   PetscFunctionReturn(0);
1677bb5a7306SBarry Smith }
1678bb5a7306SBarry Smith 
16790ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
16800ac07820SSatish Balay 
168179bdfe76SSatish Balay /* -------------------------------------------------------------------*/
168279bdfe76SSatish Balay static struct _MatOps MatOps = {
1683bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
16844c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
16854c50302cSBarry Smith   0,0,0,0,
16860ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
16870e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
168858667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
16894c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
16904c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
16914c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
169294a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1693d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1694ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1695bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1696ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
169779bdfe76SSatish Balay 
169879bdfe76SSatish Balay 
16995615d1e5SSatish Balay #undef __FUNC__
17005615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
170179bdfe76SSatish Balay /*@C
170279bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
170379bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
170479bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
170579bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
170679bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
170779bdfe76SSatish Balay 
170879bdfe76SSatish Balay    Input Parameters:
170979bdfe76SSatish Balay .  comm - MPI communicator
171079bdfe76SSatish Balay .  bs   - size of blockk
171179bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
171292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
171392e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
171492e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
171592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
171692e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
171779bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
171892e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
171979bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
172079bdfe76SSatish Balay            submatrix  (same for all local rows)
172192e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
172292e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
172392e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
172492e8d321SLois Curfman McInnes            it is zero.
172592e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
172679bdfe76SSatish Balay            submatrix (same for all local rows).
172792e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
172892e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
172992e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
173079bdfe76SSatish Balay 
173179bdfe76SSatish Balay    Output Parameter:
173279bdfe76SSatish Balay .  A - the matrix
173379bdfe76SSatish Balay 
173479bdfe76SSatish Balay    Notes:
173579bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
173679bdfe76SSatish Balay    (possibly both).
173779bdfe76SSatish Balay 
173879bdfe76SSatish Balay    Storage Information:
173979bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
174079bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
174179bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
174279bdfe76SSatish Balay    local matrix (a rectangular submatrix).
174379bdfe76SSatish Balay 
174479bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
174579bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
174679bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
174779bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
174879bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
174979bdfe76SSatish Balay 
175079bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
175179bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
175279bdfe76SSatish Balay 
175379bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
175479bdfe76SSatish Balay $         -------------------
175579bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
175679bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
175779bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
175879bdfe76SSatish Balay $         -------------------
175979bdfe76SSatish Balay $
176079bdfe76SSatish Balay 
176179bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
176279bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
176379bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
176457b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
176579bdfe76SSatish Balay 
1766d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1767d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
176879bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
176992e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
177092e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
17716da5968aSLois Curfman McInnes    matrices.
177279bdfe76SSatish Balay 
177392e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
177479bdfe76SSatish Balay 
177579bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
177679bdfe76SSatish Balay @*/
177779bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
177879bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
177979bdfe76SSatish Balay {
178079bdfe76SSatish Balay   Mat          B;
178179bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
17824c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
178379bdfe76SSatish Balay 
1784d64ed03dSBarry Smith   PetscFunctionBegin;
1785a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
17863914022bSBarry Smith 
17873914022bSBarry Smith   MPI_Comm_size(comm,&size);
17883914022bSBarry Smith   if (size == 1) {
17893914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
17903914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
17913914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
17923a40ed3dSBarry Smith     PetscFunctionReturn(0);
17933914022bSBarry Smith   }
17943914022bSBarry Smith 
179579bdfe76SSatish Balay   *A = 0;
1796d4bb536fSBarry Smith   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
179779bdfe76SSatish Balay   PLogObjectCreate(B);
179879bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
179979bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
180079bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
18014c50302cSBarry Smith 
180279bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
180379bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
180490f02eecSBarry Smith   B->mapping    = 0;
180579bdfe76SSatish Balay   B->factor     = 0;
180679bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
180779bdfe76SSatish Balay 
1808e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
180979bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
181079bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
181179bdfe76SSatish Balay 
1812d64ed03dSBarry Smith   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1813a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1814d64ed03dSBarry Smith   }
1815a8c6a408SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1816a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1817a8c6a408SBarry Smith   }
1818a8c6a408SBarry Smith   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1819a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1820a8c6a408SBarry Smith   }
1821cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1822cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
182379bdfe76SSatish Balay 
182479bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
182579bdfe76SSatish Balay     work[0] = m; work[1] = n;
182679bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
1827ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
182879bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
182979bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
183079bdfe76SSatish Balay   }
183179bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
183279bdfe76SSatish Balay     Mbs = M/bs;
1833a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
183479bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
183579bdfe76SSatish Balay     m   = mbs*bs;
183679bdfe76SSatish Balay   }
183779bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
183879bdfe76SSatish Balay     Nbs = N/bs;
1839a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
184079bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
184179bdfe76SSatish Balay     n   = nbs*bs;
184279bdfe76SSatish Balay   }
1843a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
1844a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1845a8c6a408SBarry Smith   }
184679bdfe76SSatish Balay 
184779bdfe76SSatish Balay   b->m = m; B->m = m;
184879bdfe76SSatish Balay   b->n = n; B->n = n;
184979bdfe76SSatish Balay   b->N = N; B->N = N;
185079bdfe76SSatish Balay   b->M = M; B->M = M;
185179bdfe76SSatish Balay   b->bs  = bs;
185279bdfe76SSatish Balay   b->bs2 = bs*bs;
185379bdfe76SSatish Balay   b->mbs = mbs;
185479bdfe76SSatish Balay   b->nbs = nbs;
185579bdfe76SSatish Balay   b->Mbs = Mbs;
185679bdfe76SSatish Balay   b->Nbs = Nbs;
185779bdfe76SSatish Balay 
185879bdfe76SSatish Balay   /* build local table of row and column ownerships */
185979bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1860f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
18610ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
1862ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
186379bdfe76SSatish Balay   b->rowners[0] = 0;
186479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
186579bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
186679bdfe76SSatish Balay   }
186779bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
186879bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
18694fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
18704fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
18714fa0d573SSatish Balay 
1872ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
187379bdfe76SSatish Balay   b->cowners[0] = 0;
187479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
187579bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
187679bdfe76SSatish Balay   }
187779bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
187879bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
18794fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
18804fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
188179bdfe76SSatish Balay 
1882a07cd24cSSatish Balay 
188379bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1884029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
188579bdfe76SSatish Balay   PLogObjectParent(B,b->A);
188679bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1887029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
188879bdfe76SSatish Balay   PLogObjectParent(B,b->B);
188979bdfe76SSatish Balay 
189079bdfe76SSatish Balay   /* build cache for off array entries formed */
189179bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
189290f02eecSBarry Smith   b->donotstash  = 0;
189379bdfe76SSatish Balay   b->colmap      = 0;
189479bdfe76SSatish Balay   b->garray      = 0;
189579bdfe76SSatish Balay   b->roworiented = 1;
189679bdfe76SSatish Balay 
189730793edcSSatish Balay   /* stuff used in block assembly */
189830793edcSSatish Balay   b->barray       = 0;
189930793edcSSatish Balay 
190079bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
190179bdfe76SSatish Balay   b->lvec         = 0;
190279bdfe76SSatish Balay   b->Mvctx        = 0;
190379bdfe76SSatish Balay 
190479bdfe76SSatish Balay   /* stuff for MatGetRow() */
190579bdfe76SSatish Balay   b->rowindices   = 0;
190679bdfe76SSatish Balay   b->rowvalues    = 0;
190779bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
190879bdfe76SSatish Balay 
1909a07cd24cSSatish Balay   /* hash table stuff */
1910a07cd24cSSatish Balay   b->ht          = 0;
19110bdbc534SSatish Balay   b->ht_size     = 0;
1912a07cd24cSSatish Balay 
191379bdfe76SSatish Balay   *A = B;
19143a40ed3dSBarry Smith   PetscFunctionReturn(0);
191579bdfe76SSatish Balay }
1916026e39d0SSatish Balay 
19175615d1e5SSatish Balay #undef __FUNC__
19185615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
19190ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
19200ac07820SSatish Balay {
19210ac07820SSatish Balay   Mat         mat;
19220ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
19230ac07820SSatish Balay   int         ierr, len=0, flg;
19240ac07820SSatish Balay 
1925d64ed03dSBarry Smith   PetscFunctionBegin;
19260ac07820SSatish Balay   *newmat       = 0;
1927d4bb536fSBarry Smith   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
19280ac07820SSatish Balay   PLogObjectCreate(mat);
19290ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
19300ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
19310ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
19320ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
19330ac07820SSatish Balay   mat->factor     = matin->factor;
19340ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
19350ac07820SSatish Balay 
19360ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
19370ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
19380ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
19390ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
19400ac07820SSatish Balay 
19410ac07820SSatish Balay   a->bs  = oldmat->bs;
19420ac07820SSatish Balay   a->bs2 = oldmat->bs2;
19430ac07820SSatish Balay   a->mbs = oldmat->mbs;
19440ac07820SSatish Balay   a->nbs = oldmat->nbs;
19450ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
19460ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
19470ac07820SSatish Balay 
19480ac07820SSatish Balay   a->rstart       = oldmat->rstart;
19490ac07820SSatish Balay   a->rend         = oldmat->rend;
19500ac07820SSatish Balay   a->cstart       = oldmat->cstart;
19510ac07820SSatish Balay   a->cend         = oldmat->cend;
19520ac07820SSatish Balay   a->size         = oldmat->size;
19530ac07820SSatish Balay   a->rank         = oldmat->rank;
1954e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
19550ac07820SSatish Balay   a->rowvalues    = 0;
19560ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
195730793edcSSatish Balay   a->barray       = 0;
19580ac07820SSatish Balay 
19590ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1960f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
19610ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
19620ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
19630ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
19640ac07820SSatish Balay   if (oldmat->colmap) {
19650ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
19660ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
19670ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
19680ac07820SSatish Balay   } else a->colmap = 0;
19690ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
19700ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
19710ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
19720ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
19730ac07820SSatish Balay   } else a->garray = 0;
19740ac07820SSatish Balay 
19750ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
19760ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
19770ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
19780ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
19790ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
19800ac07820SSatish Balay   PLogObjectParent(mat,a->A);
19810ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
19820ac07820SSatish Balay   PLogObjectParent(mat,a->B);
19830ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
19840ac07820SSatish Balay   if (flg) {
19850ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
19860ac07820SSatish Balay   }
19870ac07820SSatish Balay   *newmat = mat;
19883a40ed3dSBarry Smith   PetscFunctionReturn(0);
19890ac07820SSatish Balay }
199057b952d6SSatish Balay 
199157b952d6SSatish Balay #include "sys.h"
199257b952d6SSatish Balay 
19935615d1e5SSatish Balay #undef __FUNC__
19945615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
199557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
199657b952d6SSatish Balay {
199757b952d6SSatish Balay   Mat          A;
199857b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
199957b952d6SSatish Balay   Scalar       *vals,*buf;
200057b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
200157b952d6SSatish Balay   MPI_Status   status;
2002cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
200357b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
200457b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
200557b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
200657b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
200757b952d6SSatish Balay 
2008d64ed03dSBarry Smith   PetscFunctionBegin;
200957b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
201057b952d6SSatish Balay   bs2  = bs*bs;
201157b952d6SSatish Balay 
201257b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
201357b952d6SSatish Balay   if (!rank) {
201457b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2015e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2016a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2017d64ed03dSBarry Smith     if (header[3] < 0) {
2018a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2019d64ed03dSBarry Smith     }
20206c5fab8fSBarry Smith   }
2021d64ed03dSBarry Smith 
2022ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
202357b952d6SSatish Balay   M = header[1]; N = header[2];
202457b952d6SSatish Balay 
2025a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
202657b952d6SSatish Balay 
202757b952d6SSatish Balay   /*
202857b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
202957b952d6SSatish Balay      divisible by the blocksize
203057b952d6SSatish Balay   */
203157b952d6SSatish Balay   Mbs        = M/bs;
203257b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
203357b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
203457b952d6SSatish Balay   else                  Mbs++;
203557b952d6SSatish Balay   if (extra_rows &&!rank) {
2036b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
203757b952d6SSatish Balay   }
2038537820f0SBarry Smith 
203957b952d6SSatish Balay   /* determine ownership of all rows */
204057b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
204157b952d6SSatish Balay   m   = mbs * bs;
2042cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2043cee3aa6bSSatish Balay   browners = rowners + size + 1;
2044ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
204557b952d6SSatish Balay   rowners[0] = 0;
2046cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2047cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
204857b952d6SSatish Balay   rstart = rowners[rank];
204957b952d6SSatish Balay   rend   = rowners[rank+1];
205057b952d6SSatish Balay 
205157b952d6SSatish Balay   /* distribute row lengths to all processors */
205257b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
205357b952d6SSatish Balay   if (!rank) {
205457b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2055e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
205657b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
205757b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2058cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2059ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
206057b952d6SSatish Balay     PetscFree(sndcounts);
2061d64ed03dSBarry Smith   } else {
2062ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
206357b952d6SSatish Balay   }
206457b952d6SSatish Balay 
206557b952d6SSatish Balay   if (!rank) {
206657b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
206757b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
206857b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
206957b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
207057b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
207157b952d6SSatish Balay         procsnz[i] += rowlengths[j];
207257b952d6SSatish Balay       }
207357b952d6SSatish Balay     }
207457b952d6SSatish Balay     PetscFree(rowlengths);
207557b952d6SSatish Balay 
207657b952d6SSatish Balay     /* determine max buffer needed and allocate it */
207757b952d6SSatish Balay     maxnz = 0;
207857b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
207957b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
208057b952d6SSatish Balay     }
208157b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
208257b952d6SSatish Balay 
208357b952d6SSatish Balay     /* read in my part of the matrix column indices  */
208457b952d6SSatish Balay     nz = procsnz[0];
208557b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
208657b952d6SSatish Balay     mycols = ibuf;
2087cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2088e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2089cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2090cee3aa6bSSatish Balay 
209157b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
209257b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
209357b952d6SSatish Balay       nz   = procsnz[i];
2094e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2095ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
209657b952d6SSatish Balay     }
209757b952d6SSatish Balay     /* read in the stuff for the last proc */
209857b952d6SSatish Balay     if ( size != 1 ) {
209957b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2100e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
210157b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2102ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
210357b952d6SSatish Balay     }
210457b952d6SSatish Balay     PetscFree(cols);
2105d64ed03dSBarry Smith   } else {
210657b952d6SSatish Balay     /* determine buffer space needed for message */
210757b952d6SSatish Balay     nz = 0;
210857b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
210957b952d6SSatish Balay       nz += locrowlens[i];
211057b952d6SSatish Balay     }
211157b952d6SSatish Balay     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
211257b952d6SSatish Balay     mycols = ibuf;
211357b952d6SSatish Balay     /* receive message of column indices*/
2114ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2115ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2116a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
211757b952d6SSatish Balay   }
211857b952d6SSatish Balay 
211957b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2120cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2121cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
212257b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2123cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
212457b952d6SSatish Balay   masked1 = mask    + Mbs;
212557b952d6SSatish Balay   masked2 = masked1 + Mbs;
212657b952d6SSatish Balay   rowcount = 0; nzcount = 0;
212757b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
212857b952d6SSatish Balay     dcount  = 0;
212957b952d6SSatish Balay     odcount = 0;
213057b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
213157b952d6SSatish Balay       kmax = locrowlens[rowcount];
213257b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
213357b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
213457b952d6SSatish Balay         if (!mask[tmp]) {
213557b952d6SSatish Balay           mask[tmp] = 1;
213657b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
213757b952d6SSatish Balay           else masked1[dcount++] = tmp;
213857b952d6SSatish Balay         }
213957b952d6SSatish Balay       }
214057b952d6SSatish Balay       rowcount++;
214157b952d6SSatish Balay     }
2142cee3aa6bSSatish Balay 
214357b952d6SSatish Balay     dlens[i]  = dcount;
214457b952d6SSatish Balay     odlens[i] = odcount;
2145cee3aa6bSSatish Balay 
214657b952d6SSatish Balay     /* zero out the mask elements we set */
214757b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
214857b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
214957b952d6SSatish Balay   }
2150cee3aa6bSSatish Balay 
215157b952d6SSatish Balay   /* create our matrix */
2152537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2153537820f0SBarry Smith          CHKERRQ(ierr);
215457b952d6SSatish Balay   A = *newmat;
21556d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
215657b952d6SSatish Balay 
215757b952d6SSatish Balay   if (!rank) {
215857b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
215957b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
216057b952d6SSatish Balay     nz = procsnz[0];
216157b952d6SSatish Balay     vals = buf;
2162cee3aa6bSSatish Balay     mycols = ibuf;
2163cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2164e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2165cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2166537820f0SBarry Smith 
216757b952d6SSatish Balay     /* insert into matrix */
216857b952d6SSatish Balay     jj      = rstart*bs;
216957b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
217057b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
217157b952d6SSatish Balay       mycols += locrowlens[i];
217257b952d6SSatish Balay       vals   += locrowlens[i];
217357b952d6SSatish Balay       jj++;
217457b952d6SSatish Balay     }
217557b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
217657b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
217757b952d6SSatish Balay       nz   = procsnz[i];
217857b952d6SSatish Balay       vals = buf;
2179e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2180ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
218157b952d6SSatish Balay     }
218257b952d6SSatish Balay     /* the last proc */
218357b952d6SSatish Balay     if ( size != 1 ){
218457b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2185cee3aa6bSSatish Balay       vals = buf;
2186e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
218757b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2188ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
218957b952d6SSatish Balay     }
219057b952d6SSatish Balay     PetscFree(procsnz);
2191d64ed03dSBarry Smith   } else {
219257b952d6SSatish Balay     /* receive numeric values */
219357b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
219457b952d6SSatish Balay 
219557b952d6SSatish Balay     /* receive message of values*/
219657b952d6SSatish Balay     vals   = buf;
2197cee3aa6bSSatish Balay     mycols = ibuf;
2198ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2199ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2200a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
220157b952d6SSatish Balay 
220257b952d6SSatish Balay     /* insert into matrix */
220357b952d6SSatish Balay     jj      = rstart*bs;
2204cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
220557b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
220657b952d6SSatish Balay       mycols += locrowlens[i];
220757b952d6SSatish Balay       vals   += locrowlens[i];
220857b952d6SSatish Balay       jj++;
220957b952d6SSatish Balay     }
221057b952d6SSatish Balay   }
221157b952d6SSatish Balay   PetscFree(locrowlens);
221257b952d6SSatish Balay   PetscFree(buf);
221357b952d6SSatish Balay   PetscFree(ibuf);
221457b952d6SSatish Balay   PetscFree(rowners);
221557b952d6SSatish Balay   PetscFree(dlens);
2216cee3aa6bSSatish Balay   PetscFree(mask);
22176d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
22186d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
22193a40ed3dSBarry Smith   PetscFunctionReturn(0);
222057b952d6SSatish Balay }
222157b952d6SSatish Balay 
222257b952d6SSatish Balay 
2223