xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f1af5d2ffeae1f5fc391a89939f4818e47770ae3)
1*f1af5d2fSBarry Smith /*$Id: mpibaij.c,v 1.182 1999/10/24 14:02:31 bsmith Exp bsmith $*/
279bdfe76SSatish Balay 
377ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
579bdfe76SSatish Balay 
657b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
757b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
8d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
97b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
163b2fbd54SBarry Smith 
177fc3c18eSBarry Smith EXTERN_C_BEGIN
187fc3c18eSBarry Smith #undef __FUNC__
197fc3c18eSBarry Smith #define __FUNC__ "MatStoreValues_MPIBAIJ"
207fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat)
217fc3c18eSBarry Smith {
227fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
237fc3c18eSBarry Smith   int         ierr;
247fc3c18eSBarry Smith 
257fc3c18eSBarry Smith   PetscFunctionBegin;
267fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
277fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
287fc3c18eSBarry Smith   PetscFunctionReturn(0);
297fc3c18eSBarry Smith }
307fc3c18eSBarry Smith EXTERN_C_END
317fc3c18eSBarry Smith 
327fc3c18eSBarry Smith EXTERN_C_BEGIN
337fc3c18eSBarry Smith #undef __FUNC__
347fc3c18eSBarry Smith #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
357fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat)
367fc3c18eSBarry Smith {
377fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
387fc3c18eSBarry Smith   int         ierr;
397fc3c18eSBarry Smith 
407fc3c18eSBarry Smith   PetscFunctionBegin;
417fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
427fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
437fc3c18eSBarry Smith   PetscFunctionReturn(0);
447fc3c18eSBarry Smith }
457fc3c18eSBarry Smith EXTERN_C_END
467fc3c18eSBarry Smith 
47537820f0SBarry Smith /*
48537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
4957b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
5057b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
5157b952d6SSatish Balay    length of colmap equals the global matrix length.
5257b952d6SSatish Balay */
535615d1e5SSatish Balay #undef __FUNC__
545615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
5557b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
5657b952d6SSatish Balay {
5757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
5857b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
59dc2900e9SSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
6057b952d6SSatish Balay 
61d64ed03dSBarry Smith   PetscFunctionBegin;
62aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
630f5bd95cSBarry Smith   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
6448e59246SSatish Balay   for ( i=0; i<nbs; i++ ){
650f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
6648e59246SSatish Balay   }
6748e59246SSatish Balay #else
68758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
6957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
70549d3d68SSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
71928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
7248e59246SSatish Balay #endif
733a40ed3dSBarry Smith   PetscFunctionReturn(0);
7457b952d6SSatish Balay }
7557b952d6SSatish Balay 
7680c1aa95SSatish Balay #define CHUNKSIZE  10
7780c1aa95SSatish Balay 
78f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7980c1aa95SSatish Balay { \
8080c1aa95SSatish Balay  \
8180c1aa95SSatish Balay     brow = row/bs;  \
8280c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
8480c1aa95SSatish Balay       bcol = col/bs; \
8580c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
86ab26458aSBarry Smith       low = 0; high = nrow; \
87ab26458aSBarry Smith       while (high-low > 3) { \
88ab26458aSBarry Smith         t = (low+high)/2; \
89ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
90ab26458aSBarry Smith         else              low  = t; \
91ab26458aSBarry Smith       } \
92ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
9380c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
9480c1aa95SSatish Balay         if (rp[_i] == bcol) { \
9580c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
97eada6651SSatish Balay           else                    *bap  = value;  \
98ac7a638eSSatish Balay           goto a_noinsert; \
9980c1aa95SSatish Balay         } \
10080c1aa95SSatish Balay       } \
10189280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
102a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
10380c1aa95SSatish Balay       if (nrow >= rmax) { \
10480c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
10580c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
10680c1aa95SSatish Balay         Scalar *new_a; \
10780c1aa95SSatish Balay  \
108a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
10989280ab3SLois Curfman McInnes  \
11080c1aa95SSatish Balay         /* malloc new storage space */ \
11180c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
11280c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
11380c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
11480c1aa95SSatish Balay         new_i   = new_j + new_nz; \
11580c1aa95SSatish Balay  \
11680c1aa95SSatish Balay         /* copy over old data into new slots */ \
11780c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
11880c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
12080c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121549d3d68SSatish Balay         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
122549d3d68SSatish Balay                           len*sizeof(int));CHKERRQ(ierr); \
123549d3d68SSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
124549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
125549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
126549d3d68SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
12780c1aa95SSatish Balay         /* free up old matrix storage */ \
128606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
129606d414cSSatish Balay         if (!a->singlemalloc) { \
130606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
131606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
132606d414cSSatish Balay         } \
13380c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
13480c1aa95SSatish Balay         a->singlemalloc = 1; \
13580c1aa95SSatish Balay  \
13680c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
137ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
13880c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
13980c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
14080c1aa95SSatish Balay         a->reallocs++; \
14180c1aa95SSatish Balay         a->nz++; \
14280c1aa95SSatish Balay       } \
14380c1aa95SSatish Balay       N = nrow++ - 1;  \
14480c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
14580c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
14680c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
147549d3d68SSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
14880c1aa95SSatish Balay       } \
149549d3d68SSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
15080c1aa95SSatish Balay       rp[_i]                      = bcol;  \
15180c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
152ac7a638eSSatish Balay       a_noinsert:; \
15380c1aa95SSatish Balay     ailen[brow] = nrow; \
15480c1aa95SSatish Balay }
15557b952d6SSatish Balay 
156ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
157ac7a638eSSatish Balay { \
158ac7a638eSSatish Balay  \
159ac7a638eSSatish Balay     brow = row/bs;  \
160ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
161ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
162ac7a638eSSatish Balay       bcol = col/bs; \
163ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
164ac7a638eSSatish Balay       low = 0; high = nrow; \
165ac7a638eSSatish Balay       while (high-low > 3) { \
166ac7a638eSSatish Balay         t = (low+high)/2; \
167ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
168ac7a638eSSatish Balay         else              low  = t; \
169ac7a638eSSatish Balay       } \
170ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
171ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
172ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
173ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
174ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
175ac7a638eSSatish Balay           else                    *bap  = value;  \
176ac7a638eSSatish Balay           goto b_noinsert; \
177ac7a638eSSatish Balay         } \
178ac7a638eSSatish Balay       } \
17989280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
180a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
181ac7a638eSSatish Balay       if (nrow >= rmax) { \
182ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
18374c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
184ac7a638eSSatish Balay         Scalar *new_a; \
185ac7a638eSSatish Balay  \
186a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
18789280ab3SLois Curfman McInnes  \
188ac7a638eSSatish Balay         /* malloc new storage space */ \
18974c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
190ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
191ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
192ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
193ac7a638eSSatish Balay  \
194ac7a638eSSatish Balay         /* copy over old data into new slots */ \
195ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
19674c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
197549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
198ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
199549d3d68SSatish Balay         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
200549d3d68SSatish Balay                            len*sizeof(int));CHKERRQ(ierr); \
201549d3d68SSatish Balay         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
202549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
203549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
204549d3d68SSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
205ac7a638eSSatish Balay         /* free up old matrix storage */ \
206606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
207606d414cSSatish Balay         if (!b->singlemalloc) { \
208606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
209606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
210606d414cSSatish Balay         } \
21174c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
21274c639caSSatish Balay         b->singlemalloc = 1; \
213ac7a638eSSatish Balay  \
214ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
215ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
21674c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
21774c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
21874c639caSSatish Balay         b->reallocs++; \
21974c639caSSatish Balay         b->nz++; \
220ac7a638eSSatish Balay       } \
221ac7a638eSSatish Balay       N = nrow++ - 1;  \
222ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
223ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
224ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
225549d3d68SSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
226ac7a638eSSatish Balay       } \
227549d3d68SSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
228ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
229ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
230ac7a638eSSatish Balay       b_noinsert:; \
231ac7a638eSSatish Balay     bilen[brow] = nrow; \
232ac7a638eSSatish Balay }
233ac7a638eSSatish Balay 
2345615d1e5SSatish Balay #undef __FUNC__
2355615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
236ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
23757b952d6SSatish Balay {
23857b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
23957b952d6SSatish Balay   Scalar      value;
2404fa0d573SSatish Balay   int         ierr,i,j,row,col;
2414fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2424fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2434fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
24457b952d6SSatish Balay 
245eada6651SSatish Balay   /* Some Variables required in the macro */
24680c1aa95SSatish Balay   Mat         A = baij->A;
24780c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
248ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
249ac7a638eSSatish Balay   Scalar      *aa=a->a;
250ac7a638eSSatish Balay 
251ac7a638eSSatish Balay   Mat         B = baij->B;
252ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
253ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
254ac7a638eSSatish Balay   Scalar      *ba=b->a;
255ac7a638eSSatish Balay 
256ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
257ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
258ac7a638eSSatish Balay   Scalar      *ap,*bap;
25980c1aa95SSatish Balay 
260d64ed03dSBarry Smith   PetscFunctionBegin;
26157b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
2625ef9f2a5SBarry Smith     if (im[i] < 0) continue;
263aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
264a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
265639f9d9dSBarry Smith #endif
26657b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
26757b952d6SSatish Balay       row = im[i] - rstart_orig;
26857b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
26957b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
27057b952d6SSatish Balay           col = in[j] - cstart_orig;
27157b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
272f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
27380c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
27473959e64SBarry Smith         } else if (in[j] < 0) continue;
275aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
276a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
277639f9d9dSBarry Smith #endif
27857b952d6SSatish Balay         else {
27957b952d6SSatish Balay           if (mat->was_assembled) {
280905e6a2fSBarry Smith             if (!baij->colmap) {
281905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
282905e6a2fSBarry Smith             }
283aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
2840f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
285fa46199cSSatish Balay             col  = col - 1 + in[j]%bs;
28648e59246SSatish Balay #else
287905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
28848e59246SSatish Balay #endif
28957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
29057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
29157b952d6SSatish Balay               col =  in[j];
2929bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2939bf004c3SSatish Balay               B = baij->B;
2949bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2959bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2969bf004c3SSatish Balay               ba=b->a;
29757b952d6SSatish Balay             }
298d64ed03dSBarry Smith           } else col = in[j];
29957b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
300ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
301ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
30257b952d6SSatish Balay         }
30357b952d6SSatish Balay       }
304d64ed03dSBarry Smith     } else {
30590f02eecSBarry Smith       if ( !baij->donotstash) {
306ff2fd236SBarry Smith         if (roworiented) {
3078798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
308ff2fd236SBarry Smith         } else {
3098798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
31057b952d6SSatish Balay         }
31157b952d6SSatish Balay       }
31257b952d6SSatish Balay     }
31390f02eecSBarry Smith   }
3143a40ed3dSBarry Smith   PetscFunctionReturn(0);
31557b952d6SSatish Balay }
31657b952d6SSatish Balay 
317ab26458aSBarry Smith #undef __FUNC__
318ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
319ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
320ab26458aSBarry Smith {
321ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
32230793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
3237ef1d9bdSSatish Balay   int         ierr,i,j,ii,jj,row,col;
324ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
325ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
326ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
327ab26458aSBarry Smith 
328b16ae2b1SBarry Smith   PetscFunctionBegin;
32930793edcSSatish Balay   if(!barray) {
33047513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
33130793edcSSatish Balay   }
33230793edcSSatish Balay 
333ab26458aSBarry Smith   if (roworiented) {
334ab26458aSBarry Smith     stepval = (n-1)*bs;
335ab26458aSBarry Smith   } else {
336ab26458aSBarry Smith     stepval = (m-1)*bs;
337ab26458aSBarry Smith   }
338ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
3395ef9f2a5SBarry Smith     if (im[i] < 0) continue;
340aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
3415ef9f2a5SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
342ab26458aSBarry Smith #endif
343ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
344ab26458aSBarry Smith       row = im[i] - rstart;
345ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
34615b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
34715b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
34815b57d14SSatish Balay           barray = v + i*bs2;
34915b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
35015b57d14SSatish Balay           barray = v + j*bs2;
35115b57d14SSatish Balay         } else { /* Here a copy is required */
352ab26458aSBarry Smith           if (roworiented) {
353ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
354ab26458aSBarry Smith           } else {
355ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
356abef11f7SSatish Balay           }
35747513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
35847513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
35930793edcSSatish Balay               *barray++  = *value++;
36047513183SBarry Smith             }
36147513183SBarry Smith           }
36230793edcSSatish Balay           barray -=bs2;
36315b57d14SSatish Balay         }
364abef11f7SSatish Balay 
365abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
366abef11f7SSatish Balay           col  = in[j] - cstart;
36730793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
368ab26458aSBarry Smith         }
3695ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
370aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
3715ef9f2a5SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
372ab26458aSBarry Smith #endif
373ab26458aSBarry Smith         else {
374ab26458aSBarry Smith           if (mat->was_assembled) {
375ab26458aSBarry Smith             if (!baij->colmap) {
376ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
377ab26458aSBarry Smith             }
378a5eb4965SSatish Balay 
379aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
380aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
381fa46199cSSatish Balay             { int data;
3820f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3830f5bd95cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
384fa46199cSSatish Balay             }
38548e59246SSatish Balay #else
3860f5bd95cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
387a5eb4965SSatish Balay #endif
38848e59246SSatish Balay #endif
389aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
3900f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
391fa46199cSSatish Balay             col  = (col - 1)/bs;
39248e59246SSatish Balay #else
393a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
39448e59246SSatish Balay #endif
395ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
396ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
397ab26458aSBarry Smith               col =  in[j];
398ab26458aSBarry Smith             }
399ab26458aSBarry Smith           }
400ab26458aSBarry Smith           else col = in[j];
40130793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
402ab26458aSBarry Smith         }
403ab26458aSBarry Smith       }
404d64ed03dSBarry Smith     } else {
405ab26458aSBarry Smith       if (!baij->donotstash) {
406ff2fd236SBarry Smith         if (roworiented) {
4078798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
408ff2fd236SBarry Smith         } else {
4098798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
410ff2fd236SBarry Smith         }
411abef11f7SSatish Balay       }
412ab26458aSBarry Smith     }
413ab26458aSBarry Smith   }
4143a40ed3dSBarry Smith   PetscFunctionReturn(0);
415ab26458aSBarry Smith }
4160bdbc534SSatish Balay #define HASH_KEY 0.6180339887
417c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
418c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
419c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
4205615d1e5SSatish Balay #undef __FUNC__
4210bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
4220bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4230bdbc534SSatish Balay {
4240bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4250bdbc534SSatish Balay   int         ierr,i,j,row,col;
4260bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
427c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
428c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
429c2760754SSatish Balay   double      tmp;
430b9e4cc15SSatish Balay   Scalar      ** HD = baij->hd,value;
431aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4324a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4334a15367fSSatish Balay #endif
4340bdbc534SSatish Balay 
4350bdbc534SSatish Balay   PetscFunctionBegin;
4360bdbc534SSatish Balay 
4370bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
438aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4390bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4400bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4410bdbc534SSatish Balay #endif
4420bdbc534SSatish Balay       row = im[i];
443c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4440bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4450bdbc534SSatish Balay         col = in[j];
4460bdbc534SSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4470bdbc534SSatish Balay         /* Look up into the Hash Table */
448c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
449c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4500bdbc534SSatish Balay 
451c2760754SSatish Balay 
452c2760754SSatish Balay         idx = h1;
453aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
454187ce0cbSSatish Balay         insert_ct++;
455187ce0cbSSatish Balay         total_ct++;
456187ce0cbSSatish Balay         if (HT[idx] != key) {
457187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
458187ce0cbSSatish Balay           if (idx == size) {
459187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
460187ce0cbSSatish Balay             if (idx == h1) {
461187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
462187ce0cbSSatish Balay             }
463187ce0cbSSatish Balay           }
464187ce0cbSSatish Balay         }
465187ce0cbSSatish Balay #else
466c2760754SSatish Balay         if (HT[idx] != key) {
467c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
468c2760754SSatish Balay           if (idx == size) {
469c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
470c2760754SSatish Balay             if (idx == h1) {
471c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
472c2760754SSatish Balay             }
473c2760754SSatish Balay           }
474c2760754SSatish Balay         }
475187ce0cbSSatish Balay #endif
476c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
477c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
478c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
4790bdbc534SSatish Balay       }
4800bdbc534SSatish Balay     } else {
4810bdbc534SSatish Balay       if (!baij->donotstash) {
482ff2fd236SBarry Smith         if (roworiented) {
4838798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
484ff2fd236SBarry Smith         } else {
4858798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4860bdbc534SSatish Balay         }
4870bdbc534SSatish Balay       }
4880bdbc534SSatish Balay     }
4890bdbc534SSatish Balay   }
490aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
491187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
492187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
493187ce0cbSSatish Balay #endif
4940bdbc534SSatish Balay   PetscFunctionReturn(0);
4950bdbc534SSatish Balay }
4960bdbc534SSatish Balay 
4970bdbc534SSatish Balay #undef __FUNC__
4980bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
4990bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
5000bdbc534SSatish Balay {
5010bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
5028798bf22SSatish Balay   int         ierr,i,j,ii,jj,row,col;
5030bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
504b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
505c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
506c2760754SSatish Balay   double      tmp;
507187ce0cbSSatish Balay   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
508aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5094a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5104a15367fSSatish Balay #endif
5110bdbc534SSatish Balay 
512d0a41580SSatish Balay   PetscFunctionBegin;
513d0a41580SSatish Balay 
5140bdbc534SSatish Balay   if (roworiented) {
5150bdbc534SSatish Balay     stepval = (n-1)*bs;
5160bdbc534SSatish Balay   } else {
5170bdbc534SSatish Balay     stepval = (m-1)*bs;
5180bdbc534SSatish Balay   }
5190bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
520aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5210bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
5220bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
5230bdbc534SSatish Balay #endif
5240bdbc534SSatish Balay     row   = im[i];
525187ce0cbSSatish Balay     v_t   = v + i*bs2;
526c2760754SSatish Balay     if (row >= rstart && row < rend) {
5270bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
5280bdbc534SSatish Balay         col = in[j];
5290bdbc534SSatish Balay 
5300bdbc534SSatish Balay         /* Look up into the Hash Table */
531c2760754SSatish Balay         key = row*Nbs+col+1;
532c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5330bdbc534SSatish Balay 
534c2760754SSatish Balay         idx = h1;
535aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
536187ce0cbSSatish Balay         total_ct++;
537187ce0cbSSatish Balay         insert_ct++;
538187ce0cbSSatish Balay        if (HT[idx] != key) {
539187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
540187ce0cbSSatish Balay           if (idx == size) {
541187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
542187ce0cbSSatish Balay             if (idx == h1) {
543187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
544187ce0cbSSatish Balay             }
545187ce0cbSSatish Balay           }
546187ce0cbSSatish Balay         }
547187ce0cbSSatish Balay #else
548c2760754SSatish Balay         if (HT[idx] != key) {
549c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
550c2760754SSatish Balay           if (idx == size) {
551c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
552c2760754SSatish Balay             if (idx == h1) {
553c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
554c2760754SSatish Balay             }
555c2760754SSatish Balay           }
556c2760754SSatish Balay         }
557187ce0cbSSatish Balay #endif
558c2760754SSatish Balay         baij_a = HD[idx];
5590bdbc534SSatish Balay         if (roworiented) {
560c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
561187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
562187ce0cbSSatish Balay           value = v_t;
563187ce0cbSSatish Balay           v_t  += bs;
564fef45726SSatish Balay           if (addv == ADD_VALUES) {
565c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
566c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
567fef45726SSatish Balay                 baij_a[jj]  += *value++;
568b4cc0f5aSSatish Balay               }
569b4cc0f5aSSatish Balay             }
570fef45726SSatish Balay           } else {
571c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
572c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
573fef45726SSatish Balay                 baij_a[jj]  = *value++;
574fef45726SSatish Balay               }
575fef45726SSatish Balay             }
576fef45726SSatish Balay           }
5770bdbc534SSatish Balay         } else {
5780bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
579fef45726SSatish Balay           if (addv == ADD_VALUES) {
580b4cc0f5aSSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
5810bdbc534SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
582fef45726SSatish Balay                 baij_a[jj]  += *value++;
583fef45726SSatish Balay               }
584fef45726SSatish Balay             }
585fef45726SSatish Balay           } else {
586fef45726SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
587fef45726SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
588fef45726SSatish Balay                 baij_a[jj]  = *value++;
589fef45726SSatish Balay               }
590b4cc0f5aSSatish Balay             }
5910bdbc534SSatish Balay           }
5920bdbc534SSatish Balay         }
5930bdbc534SSatish Balay       }
5940bdbc534SSatish Balay     } else {
5950bdbc534SSatish Balay       if (!baij->donotstash) {
5960bdbc534SSatish Balay         if (roworiented) {
5978798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
5980bdbc534SSatish Balay         } else {
5998798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6000bdbc534SSatish Balay         }
6010bdbc534SSatish Balay       }
6020bdbc534SSatish Balay     }
6030bdbc534SSatish Balay   }
604aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
605187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
606187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
607187ce0cbSSatish Balay #endif
6080bdbc534SSatish Balay   PetscFunctionReturn(0);
6090bdbc534SSatish Balay }
610133cdb44SSatish Balay 
6110bdbc534SSatish Balay #undef __FUNC__
6125615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
613ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
614d6de1c52SSatish Balay {
615d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
616d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
61748e59246SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
618d6de1c52SSatish Balay 
619133cdb44SSatish Balay   PetscFunctionBegin;
620d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
621a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
622a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
623d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
624d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
625d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
626a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
627a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
628d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
629d6de1c52SSatish Balay           col = idxn[j] - bscstart;
63098dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
631d64ed03dSBarry Smith         } else {
632905e6a2fSBarry Smith           if (!baij->colmap) {
633905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
634905e6a2fSBarry Smith           }
635aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
6360f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
637fa46199cSSatish Balay           data --;
63848e59246SSatish Balay #else
63948e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
64048e59246SSatish Balay #endif
64148e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
642d9d09a02SSatish Balay           else {
64348e59246SSatish Balay             col  = data + idxn[j]%bs;
64498dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
645d6de1c52SSatish Balay           }
646d6de1c52SSatish Balay         }
647d6de1c52SSatish Balay       }
648d64ed03dSBarry Smith     } else {
649a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
650d6de1c52SSatish Balay     }
651d6de1c52SSatish Balay   }
6523a40ed3dSBarry Smith  PetscFunctionReturn(0);
653d6de1c52SSatish Balay }
654d6de1c52SSatish Balay 
6555615d1e5SSatish Balay #undef __FUNC__
6565615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
657ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
658d6de1c52SSatish Balay {
659d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
660d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
661acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
662d6de1c52SSatish Balay   double     sum = 0.0;
663d6de1c52SSatish Balay   Scalar     *v;
664d6de1c52SSatish Balay 
665d64ed03dSBarry Smith   PetscFunctionBegin;
666d6de1c52SSatish Balay   if (baij->size == 1) {
667d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
668d6de1c52SSatish Balay   } else {
669d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
670d6de1c52SSatish Balay       v = amat->a;
671d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
672aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
673e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
674d6de1c52SSatish Balay #else
675d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
676d6de1c52SSatish Balay #endif
677d6de1c52SSatish Balay       }
678d6de1c52SSatish Balay       v = bmat->a;
679d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
680aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
681e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
682d6de1c52SSatish Balay #else
683d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
684d6de1c52SSatish Balay #endif
685d6de1c52SSatish Balay       }
686ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
687d6de1c52SSatish Balay       *norm = sqrt(*norm);
688d64ed03dSBarry Smith     } else {
689e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
690d6de1c52SSatish Balay     }
691d64ed03dSBarry Smith   }
6923a40ed3dSBarry Smith   PetscFunctionReturn(0);
693d6de1c52SSatish Balay }
69457b952d6SSatish Balay 
695bd7f49f5SSatish Balay 
696fef45726SSatish Balay /*
697fef45726SSatish Balay   Creates the hash table, and sets the table
698fef45726SSatish Balay   This table is created only once.
699fef45726SSatish Balay   If new entried need to be added to the matrix
700fef45726SSatish Balay   then the hash table has to be destroyed and
701fef45726SSatish Balay   recreated.
702fef45726SSatish Balay */
703fef45726SSatish Balay #undef __FUNC__
704fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
705d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
706596b8d2eSBarry Smith {
707596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
708596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
709596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
7100bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
711549d3d68SSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
712187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
713fef45726SSatish Balay   int         *HT,key;
7140bdbc534SSatish Balay   Scalar      **HD;
715c2760754SSatish Balay   double      tmp;
716aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
7174a15367fSSatish Balay   int         ct=0,max=0;
7184a15367fSSatish Balay #endif
719fef45726SSatish Balay 
720d64ed03dSBarry Smith   PetscFunctionBegin;
7210bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
7220bdbc534SSatish Balay   size = baij->ht_size;
723fef45726SSatish Balay 
7240bdbc534SSatish Balay   if (baij->ht) {
7250bdbc534SSatish Balay     PetscFunctionReturn(0);
726596b8d2eSBarry Smith   }
7270bdbc534SSatish Balay 
728fef45726SSatish Balay   /* Allocate Memory for Hash Table */
729b9e4cc15SSatish Balay   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
730b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
731b9e4cc15SSatish Balay   HD = baij->hd;
732a07cd24cSSatish Balay   HT = baij->ht;
733b9e4cc15SSatish Balay 
734b9e4cc15SSatish Balay 
735549d3d68SSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
7360bdbc534SSatish Balay 
737596b8d2eSBarry Smith 
738596b8d2eSBarry Smith   /* Loop Over A */
7390bdbc534SSatish Balay   for ( i=0; i<a->mbs; i++ ) {
740596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
7410bdbc534SSatish Balay       row = i+rstart;
7420bdbc534SSatish Balay       col = aj[j]+cstart;
743596b8d2eSBarry Smith 
744187ce0cbSSatish Balay       key = row*Nbs + col + 1;
745c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7460bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7470bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7480bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7490bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
750596b8d2eSBarry Smith           break;
751aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
752187ce0cbSSatish Balay         } else {
753187ce0cbSSatish Balay           ct++;
754187ce0cbSSatish Balay #endif
755596b8d2eSBarry Smith         }
756187ce0cbSSatish Balay       }
757aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
758187ce0cbSSatish Balay       if (k> max) max = k;
759187ce0cbSSatish Balay #endif
760596b8d2eSBarry Smith     }
761596b8d2eSBarry Smith   }
762596b8d2eSBarry Smith   /* Loop Over B */
7630bdbc534SSatish Balay   for ( i=0; i<b->mbs; i++ ) {
764596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
7650bdbc534SSatish Balay       row = i+rstart;
7660bdbc534SSatish Balay       col = garray[bj[j]];
767187ce0cbSSatish Balay       key = row*Nbs + col + 1;
768c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7690bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7700bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7710bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7720bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
773596b8d2eSBarry Smith           break;
774aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
775187ce0cbSSatish Balay         } else {
776187ce0cbSSatish Balay           ct++;
777187ce0cbSSatish Balay #endif
778596b8d2eSBarry Smith         }
779187ce0cbSSatish Balay       }
780aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
781187ce0cbSSatish Balay       if (k> max) max = k;
782187ce0cbSSatish Balay #endif
783596b8d2eSBarry Smith     }
784596b8d2eSBarry Smith   }
785596b8d2eSBarry Smith 
786596b8d2eSBarry Smith   /* Print Summary */
787aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
788c2760754SSatish Balay   for ( i=0,j=0; i<size; i++)
789596b8d2eSBarry Smith     if (HT[i]) {j++;}
790187ce0cbSSatish Balay   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
791187ce0cbSSatish Balay            (j== 0)? 0.0:((double)(ct+j))/j,max);
792187ce0cbSSatish Balay #endif
7933a40ed3dSBarry Smith   PetscFunctionReturn(0);
794596b8d2eSBarry Smith }
79557b952d6SSatish Balay 
796bbb85fb3SSatish Balay #undef __FUNC__
797bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
798bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
799bbb85fb3SSatish Balay {
800bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
801ff2fd236SBarry Smith   int         ierr,nstash,reallocs;
802bbb85fb3SSatish Balay   InsertMode  addv;
803bbb85fb3SSatish Balay 
804bbb85fb3SSatish Balay   PetscFunctionBegin;
805bbb85fb3SSatish Balay   if (baij->donotstash) {
806bbb85fb3SSatish Balay     PetscFunctionReturn(0);
807bbb85fb3SSatish Balay   }
808bbb85fb3SSatish Balay 
809bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
810bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
811bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
812bbb85fb3SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
813bbb85fb3SSatish Balay   }
814bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
815bbb85fb3SSatish Balay 
8168798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
8178798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
8188798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8195a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
8208798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8215a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
822bbb85fb3SSatish Balay   PetscFunctionReturn(0);
823bbb85fb3SSatish Balay }
824bbb85fb3SSatish Balay 
825bbb85fb3SSatish Balay #undef __FUNC__
826bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
827bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
828bbb85fb3SSatish Balay {
829bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
830a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
831a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
832a2d1c673SSatish Balay   int         *row,*col,other_disassembled,r1,r2,r3;
833bbb85fb3SSatish Balay   Scalar      *val;
834bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
835bbb85fb3SSatish Balay 
836bbb85fb3SSatish Balay   PetscFunctionBegin;
837bbb85fb3SSatish Balay   if (!baij->donotstash) {
838a2d1c673SSatish Balay     while (1) {
8398798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
840a2d1c673SSatish Balay       if (!flg) break;
841a2d1c673SSatish Balay 
842bbb85fb3SSatish Balay       for ( i=0; i<n; ) {
843bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
844bbb85fb3SSatish Balay         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
845bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
846bbb85fb3SSatish Balay         else       ncols = n-i;
847bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
848bbb85fb3SSatish Balay         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
849bbb85fb3SSatish Balay         i = j;
850bbb85fb3SSatish Balay       }
851bbb85fb3SSatish Balay     }
8528798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
853a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
854a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
855a2d1c673SSatish Balay        restore the original flags */
856a2d1c673SSatish Balay     r1 = baij->roworiented;
857a2d1c673SSatish Balay     r2 = a->roworiented;
858a2d1c673SSatish Balay     r3 = b->roworiented;
859a2d1c673SSatish Balay     baij->roworiented = 0;
860a2d1c673SSatish Balay     a->roworiented    = 0;
861a2d1c673SSatish Balay     b->roworiented    = 0;
862a2d1c673SSatish Balay     while (1) {
8638798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
864a2d1c673SSatish Balay       if (!flg) break;
865a2d1c673SSatish Balay 
866a2d1c673SSatish Balay       for ( i=0; i<n; ) {
867a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
868a2d1c673SSatish Balay         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
869a2d1c673SSatish Balay         if (j < n) ncols = j-i;
870a2d1c673SSatish Balay         else       ncols = n-i;
871a2d1c673SSatish Balay         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
872a2d1c673SSatish Balay         i = j;
873a2d1c673SSatish Balay       }
874a2d1c673SSatish Balay     }
8758798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
876a2d1c673SSatish Balay     baij->roworiented = r1;
877a2d1c673SSatish Balay     a->roworiented    = r2;
878a2d1c673SSatish Balay     b->roworiented    = r3;
879bbb85fb3SSatish Balay   }
880bbb85fb3SSatish Balay 
881bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
882bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
883bbb85fb3SSatish Balay 
884bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
885bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
886bbb85fb3SSatish Balay   /*
887bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
888bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
889bbb85fb3SSatish Balay   */
890bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
891bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
892bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
893bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
894bbb85fb3SSatish Balay     }
895bbb85fb3SSatish Balay   }
896bbb85fb3SSatish Balay 
897bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
898bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
899bbb85fb3SSatish Balay   }
900bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
901bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
902bbb85fb3SSatish Balay 
903aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
904bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
905bbb85fb3SSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
906bbb85fb3SSatish Balay              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
907bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
908bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
909bbb85fb3SSatish Balay   }
910bbb85fb3SSatish Balay #endif
911bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
912bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
913bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
914bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
915bbb85fb3SSatish Balay   }
916bbb85fb3SSatish Balay 
917606d414cSSatish Balay   if (baij->rowvalues) {
918606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
919606d414cSSatish Balay     baij->rowvalues = 0;
920606d414cSSatish Balay   }
921bbb85fb3SSatish Balay   PetscFunctionReturn(0);
922bbb85fb3SSatish Balay }
92357b952d6SSatish Balay 
924225e174dSSatish Balay /*
9255615d1e5SSatish Balay #undef __FUNC__
9265615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
92757b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
92857b952d6SSatish Balay {
92957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
93057b952d6SSatish Balay   int          ierr;
93157b952d6SSatish Balay 
932d64ed03dSBarry Smith   PetscFunctionBegin;
93357b952d6SSatish Balay   if (baij->size == 1) {
93457b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
935a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
9363a40ed3dSBarry Smith   PetscFunctionReturn(0);
93757b952d6SSatish Balay }
938225e174dSSatish Balay */
93957b952d6SSatish Balay 
9405615d1e5SSatish Balay #undef __FUNC__
9417b2a1423SBarry Smith #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
9427b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
94357b952d6SSatish Balay {
94457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
94577ed5343SBarry Smith   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
9466831982aSBarry Smith   PetscTruth   isascii,isdraw;
94757b952d6SSatish Balay 
948d64ed03dSBarry Smith   PetscFunctionBegin;
9496831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
9506831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
9510f5bd95cSBarry Smith   if (isascii) {
952d41123aaSBarry Smith     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
953639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
9544e220ebcSLois Curfman McInnes       MatInfo info;
955d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
956d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
9576831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
9584e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
9596831982aSBarry Smith               baij->bs,(int)info.memory);CHKERRQ(ierr);
960d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
9616831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
962d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
9636831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
9646831982aSBarry Smith       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
96557b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
9663a40ed3dSBarry Smith       PetscFunctionReturn(0);
967d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
9686831982aSBarry Smith       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
9693a40ed3dSBarry Smith       PetscFunctionReturn(0);
97057b952d6SSatish Balay     }
97157b952d6SSatish Balay   }
97257b952d6SSatish Balay 
9730f5bd95cSBarry Smith   if (isdraw) {
97457b952d6SSatish Balay     Draw       draw;
97557b952d6SSatish Balay     PetscTruth isnull;
97677ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
9773a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
97857b952d6SSatish Balay   }
97957b952d6SSatish Balay 
98057b952d6SSatish Balay   if (size == 1) {
98157b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
982d64ed03dSBarry Smith   } else {
98357b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
98457b952d6SSatish Balay     Mat         A;
98557b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
98640011551SBarry Smith     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
98757b952d6SSatish Balay     int         mbs = baij->mbs;
98857b952d6SSatish Balay     Scalar      *a;
98957b952d6SSatish Balay 
99057b952d6SSatish Balay     if (!rank) {
99155843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
992d64ed03dSBarry Smith     } else {
99355843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
99457b952d6SSatish Balay     }
99557b952d6SSatish Balay     PLogObjectParent(mat,A);
99657b952d6SSatish Balay 
99757b952d6SSatish Balay     /* copy over the A part */
99857b952d6SSatish Balay     Aloc  = (Mat_SeqBAIJ*) baij->A->data;
99957b952d6SSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
100057b952d6SSatish Balay     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
100157b952d6SSatish Balay 
100257b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
100357b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
100457b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
100557b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
100657b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
100757b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1008cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1009cee3aa6bSSatish Balay           col++; a += bs;
101057b952d6SSatish Balay         }
101157b952d6SSatish Balay       }
101257b952d6SSatish Balay     }
101357b952d6SSatish Balay     /* copy over the B part */
101457b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->B->data;
101557b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
101657b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
101757b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
101857b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
101957b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
102057b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
102157b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1022cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1023cee3aa6bSSatish Balay           col++; a += bs;
102457b952d6SSatish Balay         }
102557b952d6SSatish Balay       }
102657b952d6SSatish Balay     }
1027606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
10286d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10296d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
103055843e3eSBarry Smith     /*
103155843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
103255843e3eSBarry Smith        synchronized across all processors that share the Draw object
103355843e3eSBarry Smith     */
10346831982aSBarry Smith     if (!rank) {
10356831982aSBarry Smith       Viewer sviewer;
10366831982aSBarry Smith       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
10376831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
10386831982aSBarry Smith       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
103957b952d6SSatish Balay     }
104057b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
104157b952d6SSatish Balay   }
10423a40ed3dSBarry Smith   PetscFunctionReturn(0);
104357b952d6SSatish Balay }
104457b952d6SSatish Balay 
104557b952d6SSatish Balay 
104657b952d6SSatish Balay 
10475615d1e5SSatish Balay #undef __FUNC__
10485615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
1049e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer)
105057b952d6SSatish Balay {
105157b952d6SSatish Balay   int        ierr;
10526831982aSBarry Smith   PetscTruth isascii,isdraw,issocket,isbinary;
105357b952d6SSatish Balay 
1054d64ed03dSBarry Smith   PetscFunctionBegin;
10556831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
10566831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
10576831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
10586831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
10590f5bd95cSBarry Smith   if (isascii || isdraw || issocket || isbinary) {
10607b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
10615cd90555SBarry Smith   } else {
10620f5bd95cSBarry Smith     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
106357b952d6SSatish Balay   }
10643a40ed3dSBarry Smith   PetscFunctionReturn(0);
106557b952d6SSatish Balay }
106657b952d6SSatish Balay 
10675615d1e5SSatish Balay #undef __FUNC__
10685615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
1069e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
107079bdfe76SSatish Balay {
107179bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
107279bdfe76SSatish Balay   int         ierr;
107379bdfe76SSatish Balay 
1074d64ed03dSBarry Smith   PetscFunctionBegin;
107598dd23e9SBarry Smith 
107698dd23e9SBarry Smith   if (mat->mapping) {
107798dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
107898dd23e9SBarry Smith   }
107998dd23e9SBarry Smith   if (mat->bmapping) {
108098dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
108198dd23e9SBarry Smith   }
108261b13de0SBarry Smith   if (mat->rmap) {
108361b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
108461b13de0SBarry Smith   }
108561b13de0SBarry Smith   if (mat->cmap) {
108661b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
108761b13de0SBarry Smith   }
1088aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1089e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
109079bdfe76SSatish Balay #endif
109179bdfe76SSatish Balay 
10928798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
10938798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
10948798bf22SSatish Balay 
1095606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
109679bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
109779bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1098aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
10990f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
110048e59246SSatish Balay #else
1101606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
110248e59246SSatish Balay #endif
1103606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1104606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1105606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1106606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1107606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1108606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1109606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
111079bdfe76SSatish Balay   PLogObjectDestroy(mat);
111179bdfe76SSatish Balay   PetscHeaderDestroy(mat);
11123a40ed3dSBarry Smith   PetscFunctionReturn(0);
111379bdfe76SSatish Balay }
111479bdfe76SSatish Balay 
11155615d1e5SSatish Balay #undef __FUNC__
11165615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
1117ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1118cee3aa6bSSatish Balay {
1119cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
112047b4a8eaSLois Curfman McInnes   int         ierr, nt;
1121cee3aa6bSSatish Balay 
1122d64ed03dSBarry Smith   PetscFunctionBegin;
1123e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
112447b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1125a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
112647b4a8eaSLois Curfman McInnes   }
1127e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
112847b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1129a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
113047b4a8eaSLois Curfman McInnes   }
113143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1132f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
113343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1134f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
113543a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
11363a40ed3dSBarry Smith   PetscFunctionReturn(0);
1137cee3aa6bSSatish Balay }
1138cee3aa6bSSatish Balay 
11395615d1e5SSatish Balay #undef __FUNC__
11405615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
1141ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1142cee3aa6bSSatish Balay {
1143cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1144cee3aa6bSSatish Balay   int        ierr;
1145d64ed03dSBarry Smith 
1146d64ed03dSBarry Smith   PetscFunctionBegin;
114743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1148f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
114943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1150f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
11513a40ed3dSBarry Smith   PetscFunctionReturn(0);
1152cee3aa6bSSatish Balay }
1153cee3aa6bSSatish Balay 
11545615d1e5SSatish Balay #undef __FUNC__
11555615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
1156ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1157cee3aa6bSSatish Balay {
1158cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1159cee3aa6bSSatish Balay   int         ierr;
1160cee3aa6bSSatish Balay 
1161d64ed03dSBarry Smith   PetscFunctionBegin;
1162cee3aa6bSSatish Balay   /* do nondiagonal part */
1163f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1164cee3aa6bSSatish Balay   /* send it on its way */
1165537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1166cee3aa6bSSatish Balay   /* do local part */
1167f830108cSBarry Smith   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1168cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1169cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1170cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1171639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
11723a40ed3dSBarry Smith   PetscFunctionReturn(0);
1173cee3aa6bSSatish Balay }
1174cee3aa6bSSatish Balay 
11755615d1e5SSatish Balay #undef __FUNC__
11765615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1177ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1178cee3aa6bSSatish Balay {
1179cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1180cee3aa6bSSatish Balay   int         ierr;
1181cee3aa6bSSatish Balay 
1182d64ed03dSBarry Smith   PetscFunctionBegin;
1183cee3aa6bSSatish Balay   /* do nondiagonal part */
1184f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1185cee3aa6bSSatish Balay   /* send it on its way */
1186537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1187cee3aa6bSSatish Balay   /* do local part */
1188f830108cSBarry Smith   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1189cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1190cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1191cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1192537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
11933a40ed3dSBarry Smith   PetscFunctionReturn(0);
1194cee3aa6bSSatish Balay }
1195cee3aa6bSSatish Balay 
1196cee3aa6bSSatish Balay /*
1197cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1198cee3aa6bSSatish Balay    diagonal block
1199cee3aa6bSSatish Balay */
12005615d1e5SSatish Balay #undef __FUNC__
12015615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1202ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1203cee3aa6bSSatish Balay {
1204cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
12053a40ed3dSBarry Smith   int         ierr;
1206d64ed03dSBarry Smith 
1207d64ed03dSBarry Smith   PetscFunctionBegin;
1208a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
12093a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
12103a40ed3dSBarry Smith   PetscFunctionReturn(0);
1211cee3aa6bSSatish Balay }
1212cee3aa6bSSatish Balay 
12135615d1e5SSatish Balay #undef __FUNC__
12145615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
1215ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1216cee3aa6bSSatish Balay {
1217cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1218cee3aa6bSSatish Balay   int         ierr;
1219d64ed03dSBarry Smith 
1220d64ed03dSBarry Smith   PetscFunctionBegin;
1221cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1222cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
12233a40ed3dSBarry Smith   PetscFunctionReturn(0);
1224cee3aa6bSSatish Balay }
1225026e39d0SSatish Balay 
12265615d1e5SSatish Balay #undef __FUNC__
12275615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
1228ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
122957b952d6SSatish Balay {
123057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1231d64ed03dSBarry Smith 
1232d64ed03dSBarry Smith   PetscFunctionBegin;
1233bd7f49f5SSatish Balay   if (m) *m = mat->M;
1234bd7f49f5SSatish Balay   if (n) *n = mat->N;
12353a40ed3dSBarry Smith   PetscFunctionReturn(0);
123657b952d6SSatish Balay }
123757b952d6SSatish Balay 
12385615d1e5SSatish Balay #undef __FUNC__
12395615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1240ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
124157b952d6SSatish Balay {
124257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1243d64ed03dSBarry Smith 
1244d64ed03dSBarry Smith   PetscFunctionBegin;
1245f830108cSBarry Smith   *m = mat->m; *n = mat->n;
12463a40ed3dSBarry Smith   PetscFunctionReturn(0);
124757b952d6SSatish Balay }
124857b952d6SSatish Balay 
12495615d1e5SSatish Balay #undef __FUNC__
12505615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1251ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
125257b952d6SSatish Balay {
125357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1254d64ed03dSBarry Smith 
1255d64ed03dSBarry Smith   PetscFunctionBegin;
125657b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
12573a40ed3dSBarry Smith   PetscFunctionReturn(0);
125857b952d6SSatish Balay }
125957b952d6SSatish Balay 
12605615d1e5SSatish Balay #undef __FUNC__
12615615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1262acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1263acdf5bf4SSatish Balay {
1264acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1265acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1266acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1267d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1268d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1269acdf5bf4SSatish Balay 
1270d64ed03dSBarry Smith   PetscFunctionBegin;
1271a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1272acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1273acdf5bf4SSatish Balay 
1274acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1275acdf5bf4SSatish Balay     /*
1276acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1277acdf5bf4SSatish Balay     */
1278acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1279bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1280bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1281acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1282acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1283acdf5bf4SSatish Balay     }
1284549d3d68SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1285acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1286acdf5bf4SSatish Balay   }
1287acdf5bf4SSatish Balay 
1288a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1289d9d09a02SSatish Balay   lrow = row - brstart;
1290acdf5bf4SSatish Balay 
1291acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1292acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1293acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1294f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1295f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1296acdf5bf4SSatish Balay   nztot = nzA + nzB;
1297acdf5bf4SSatish Balay 
1298acdf5bf4SSatish Balay   cmap  = mat->garray;
1299acdf5bf4SSatish Balay   if (v  || idx) {
1300acdf5bf4SSatish Balay     if (nztot) {
1301acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1302acdf5bf4SSatish Balay       int imark = -1;
1303acdf5bf4SSatish Balay       if (v) {
1304acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1305acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1306d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1307acdf5bf4SSatish Balay           else break;
1308acdf5bf4SSatish Balay         }
1309acdf5bf4SSatish Balay         imark = i;
1310acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1311acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1312acdf5bf4SSatish Balay       }
1313acdf5bf4SSatish Balay       if (idx) {
1314acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1315acdf5bf4SSatish Balay         if (imark > -1) {
1316acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1317bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1318acdf5bf4SSatish Balay           }
1319acdf5bf4SSatish Balay         } else {
1320acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1321d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1322d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1323acdf5bf4SSatish Balay             else break;
1324acdf5bf4SSatish Balay           }
1325acdf5bf4SSatish Balay           imark = i;
1326acdf5bf4SSatish Balay         }
1327d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1328d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1329acdf5bf4SSatish Balay       }
1330d64ed03dSBarry Smith     } else {
1331d212a18eSSatish Balay       if (idx) *idx = 0;
1332d212a18eSSatish Balay       if (v)   *v   = 0;
1333d212a18eSSatish Balay     }
1334acdf5bf4SSatish Balay   }
1335acdf5bf4SSatish Balay   *nz = nztot;
1336f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1337f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
13383a40ed3dSBarry Smith   PetscFunctionReturn(0);
1339acdf5bf4SSatish Balay }
1340acdf5bf4SSatish Balay 
13415615d1e5SSatish Balay #undef __FUNC__
13425615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1343acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1344acdf5bf4SSatish Balay {
1345acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1346d64ed03dSBarry Smith 
1347d64ed03dSBarry Smith   PetscFunctionBegin;
1348acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1349a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1350acdf5bf4SSatish Balay   }
1351acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
13523a40ed3dSBarry Smith   PetscFunctionReturn(0);
1353acdf5bf4SSatish Balay }
1354acdf5bf4SSatish Balay 
13555615d1e5SSatish Balay #undef __FUNC__
13565615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1357ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
13585a838052SSatish Balay {
13595a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1360d64ed03dSBarry Smith 
1361d64ed03dSBarry Smith   PetscFunctionBegin;
13625a838052SSatish Balay   *bs = baij->bs;
13633a40ed3dSBarry Smith   PetscFunctionReturn(0);
13645a838052SSatish Balay }
13655a838052SSatish Balay 
13665615d1e5SSatish Balay #undef __FUNC__
13675615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1368ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
136958667388SSatish Balay {
137058667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
137158667388SSatish Balay   int         ierr;
1372d64ed03dSBarry Smith 
1373d64ed03dSBarry Smith   PetscFunctionBegin;
137458667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
137558667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
13763a40ed3dSBarry Smith   PetscFunctionReturn(0);
137758667388SSatish Balay }
13780ac07820SSatish Balay 
13795615d1e5SSatish Balay #undef __FUNC__
13805615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1381ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
13820ac07820SSatish Balay {
13834e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
13844e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
13857d57db60SLois Curfman McInnes   int         ierr;
13867d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
13870ac07820SSatish Balay 
1388d64ed03dSBarry Smith   PetscFunctionBegin;
13894e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
13904e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
13910e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1392de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
13934e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
13940e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1395de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
13960ac07820SSatish Balay   if (flag == MAT_LOCAL) {
13974e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
13984e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
13994e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14004e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14014e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14020ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1403f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
14044e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14054e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14064e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14074e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14084e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14090ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1410f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
14114e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14124e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14134e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14144e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14154e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1416d41123aaSBarry Smith   } else {
1417d41123aaSBarry Smith     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
14180ac07820SSatish Balay   }
14195a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
14205a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
14215a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
14225a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
14234e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14244e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14254e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14263a40ed3dSBarry Smith   PetscFunctionReturn(0);
14270ac07820SSatish Balay }
14280ac07820SSatish Balay 
14295615d1e5SSatish Balay #undef __FUNC__
14305615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1431ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
143258667388SSatish Balay {
143358667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
143498305bb5SBarry Smith   int         ierr;
143558667388SSatish Balay 
1436d64ed03dSBarry Smith   PetscFunctionBegin;
143758667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
143858667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
14396da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1440c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
14414787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
14424787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
144398305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
144498305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1445b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1446aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
144798305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
144898305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1449b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
14506da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
145158667388SSatish Balay              op == MAT_SYMMETRIC ||
145258667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1453b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
145498305bb5SBarry Smith              op == MAT_USE_HASH_TABLE) {
145558667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
145698305bb5SBarry Smith   } else if (op == MAT_COLUMN_ORIENTED) {
145758667388SSatish Balay     a->roworiented = 0;
145898305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
145998305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
14602b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
146190f02eecSBarry Smith     a->donotstash = 1;
1462d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1463d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1464133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1465133cdb44SSatish Balay     a->ht_flag = 1;
1466d64ed03dSBarry Smith   } else {
1467d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1468d64ed03dSBarry Smith   }
14693a40ed3dSBarry Smith   PetscFunctionReturn(0);
147058667388SSatish Balay }
147158667388SSatish Balay 
14725615d1e5SSatish Balay #undef __FUNC__
14735615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1474ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
14750ac07820SSatish Balay {
14760ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
14770ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
14780ac07820SSatish Balay   Mat        B;
147940011551SBarry Smith   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
14800ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
14810ac07820SSatish Balay   Scalar     *a;
14820ac07820SSatish Balay 
1483d64ed03dSBarry Smith   PetscFunctionBegin;
1484a8c6a408SBarry Smith   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
148528937c7bSBarry Smith   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
14860ac07820SSatish Balay CHKERRQ(ierr);
14870ac07820SSatish Balay 
14880ac07820SSatish Balay   /* copy over the A part */
14890ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
14900ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
14910ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
14920ac07820SSatish Balay 
14930ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
14940ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
14950ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
14960ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
14970ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
14980ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
14990ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15000ac07820SSatish Balay         col++; a += bs;
15010ac07820SSatish Balay       }
15020ac07820SSatish Balay     }
15030ac07820SSatish Balay   }
15040ac07820SSatish Balay   /* copy over the B part */
15050ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
15060ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15070ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
15080ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15090ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
15100ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
15110ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
15120ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
15130ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15140ac07820SSatish Balay         col++; a += bs;
15150ac07820SSatish Balay       }
15160ac07820SSatish Balay     }
15170ac07820SSatish Balay   }
1518606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
15190ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15200ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15210ac07820SSatish Balay 
15220ac07820SSatish Balay   if (matout != PETSC_NULL) {
15230ac07820SSatish Balay     *matout = B;
15240ac07820SSatish Balay   } else {
1525f830108cSBarry Smith     PetscOps *Abops;
1526cc2dc46cSBarry Smith     MatOps   Aops;
1527f830108cSBarry Smith 
15280ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
1529606d414cSSatish Balay     ierr = PetscFree(baij->rowners); CHKERRQ(ierr);
15300ac07820SSatish Balay     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
15310ac07820SSatish Balay     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1532aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
15330f5bd95cSBarry Smith     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1534b1fc9764SSatish Balay #else
1535606d414cSSatish Balay     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1536b1fc9764SSatish Balay #endif
1537606d414cSSatish Balay     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1538606d414cSSatish Balay     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1539606d414cSSatish Balay     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1540606d414cSSatish Balay     ierr = PetscFree(baij);CHKERRQ(ierr);
1541f830108cSBarry Smith 
1542f830108cSBarry Smith     /*
1543f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1544f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1545f830108cSBarry Smith       else from C.
1546f830108cSBarry Smith     */
1547f830108cSBarry Smith     Abops   = A->bops;
1548f830108cSBarry Smith     Aops    = A->ops;
1549549d3d68SSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1550f830108cSBarry Smith     A->bops = Abops;
1551f830108cSBarry Smith     A->ops  = Aops;
1552f830108cSBarry Smith 
15530ac07820SSatish Balay     PetscHeaderDestroy(B);
15540ac07820SSatish Balay   }
15553a40ed3dSBarry Smith   PetscFunctionReturn(0);
15560ac07820SSatish Balay }
15570e95ebc0SSatish Balay 
15585615d1e5SSatish Balay #undef __FUNC__
15595615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
156036c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
15610e95ebc0SSatish Balay {
156236c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
156336c4a09eSSatish Balay   Mat         a = baij->A, b = baij->B;
15640e95ebc0SSatish Balay   int         ierr,s1,s2,s3;
15650e95ebc0SSatish Balay 
1566d64ed03dSBarry Smith   PetscFunctionBegin;
156736c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
156836c4a09eSSatish Balay   if (rr) {
156936c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
157036c4a09eSSatish Balay     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
157136c4a09eSSatish Balay     /* Overlap communication with computation. */
157236c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
157336c4a09eSSatish Balay   }
15740e95ebc0SSatish Balay   if (ll) {
15750e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
157636c4a09eSSatish Balay     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
157736c4a09eSSatish Balay     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
15780e95ebc0SSatish Balay   }
157936c4a09eSSatish Balay   /* scale  the diagonal block */
158036c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
158136c4a09eSSatish Balay 
158236c4a09eSSatish Balay   if (rr) {
158336c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
158436c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
158536c4a09eSSatish Balay     ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr);
158636c4a09eSSatish Balay   }
158736c4a09eSSatish Balay 
15883a40ed3dSBarry Smith   PetscFunctionReturn(0);
15890e95ebc0SSatish Balay }
15900e95ebc0SSatish Balay 
15915615d1e5SSatish Balay #undef __FUNC__
15925615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1593ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
15940ac07820SSatish Balay {
15950ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
15960ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1597a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
15980ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
15990ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1600a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16010ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16020ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16030ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16040ac07820SSatish Balay   IS             istmp;
16050ac07820SSatish Balay 
1606d64ed03dSBarry Smith   PetscFunctionBegin;
16070ac07820SSatish Balay   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
16080ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
16090ac07820SSatish Balay 
16100ac07820SSatish Balay   /*  first count number of contributors to each processor */
16110ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1612549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1613549d3d68SSatish Balay   procs  = nprocs + size;
16140ac07820SSatish Balay   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
16150ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
16160ac07820SSatish Balay     idx   = rows[i];
16170ac07820SSatish Balay     found = 0;
16180ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
16190ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
16200ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
16210ac07820SSatish Balay       }
16220ac07820SSatish Balay     }
1623a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
16240ac07820SSatish Balay   }
16250ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
16260ac07820SSatish Balay 
16270ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
16286831982aSBarry Smith   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
16296831982aSBarry Smith   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
16300ac07820SSatish Balay   nmax   = work[rank];
16316831982aSBarry Smith   nrecvs = work[size+rank];
1632606d414cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
16330ac07820SSatish Balay 
16340ac07820SSatish Balay   /* post receives:   */
1635d64ed03dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1636d64ed03dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
16370ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
1638ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
16390ac07820SSatish Balay   }
16400ac07820SSatish Balay 
16410ac07820SSatish Balay   /* do sends:
16420ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
16430ac07820SSatish Balay      the ith processor
16440ac07820SSatish Balay   */
16450ac07820SSatish Balay   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1646ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
16470ac07820SSatish Balay   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
16480ac07820SSatish Balay   starts[0]  = 0;
16490ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
16500ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
16510ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
16520ac07820SSatish Balay   }
16536831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
16540ac07820SSatish Balay 
16550ac07820SSatish Balay   starts[0] = 0;
16560ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
16570ac07820SSatish Balay   count = 0;
16580ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
16590ac07820SSatish Balay     if (procs[i]) {
1660ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
16610ac07820SSatish Balay     }
16620ac07820SSatish Balay   }
1663606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
16640ac07820SSatish Balay 
16650ac07820SSatish Balay   base = owners[rank]*bs;
16660ac07820SSatish Balay 
16670ac07820SSatish Balay   /*  wait on receives */
16680ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
16690ac07820SSatish Balay   source = lens + nrecvs;
16700ac07820SSatish Balay   count  = nrecvs; slen = 0;
16710ac07820SSatish Balay   while (count) {
1672ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
16730ac07820SSatish Balay     /* unpack receives into our local space */
1674ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
16750ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
16760ac07820SSatish Balay     lens[imdex]    = n;
16770ac07820SSatish Balay     slen          += n;
16780ac07820SSatish Balay     count--;
16790ac07820SSatish Balay   }
1680606d414cSSatish Balay   ierr = PetscFree(recv_waits); CHKERRQ(ierr);
16810ac07820SSatish Balay 
16820ac07820SSatish Balay   /* move the data into the send scatter */
16830ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
16840ac07820SSatish Balay   count = 0;
16850ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
16860ac07820SSatish Balay     values = rvalues + i*nmax;
16870ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
16880ac07820SSatish Balay       lrows[count++] = values[j] - base;
16890ac07820SSatish Balay     }
16900ac07820SSatish Balay   }
1691606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1692606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1693606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1694606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
16950ac07820SSatish Balay 
16960ac07820SSatish Balay   /* actually zap the local rows */
1697029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
16980ac07820SSatish Balay   PLogObjectParent(A,istmp);
1699a07cd24cSSatish Balay 
170072dacd9aSBarry Smith   /*
170172dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
170272dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
170372dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
170472dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
170572dacd9aSBarry Smith 
170672dacd9aSBarry Smith        Contributed by: Mathew Knepley
170772dacd9aSBarry Smith   */
17089c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
17090ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
17109c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
17119c957beeSSatish Balay     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
17129c957beeSSatish Balay   } else if (diag) {
17139c957beeSSatish Balay     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1714fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1715fa46199cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1716fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
17176525c446SSatish Balay     }
1718a07cd24cSSatish Balay     for ( i = 0; i < slen; i++ ) {
1719a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
1720a07cd24cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1721a07cd24cSSatish Balay     }
1722a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17249c957beeSSatish Balay   } else {
17259c957beeSSatish Balay     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1726a07cd24cSSatish Balay   }
17279c957beeSSatish Balay 
17289c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1729606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1730a07cd24cSSatish Balay 
17310ac07820SSatish Balay   /* wait on sends */
17320ac07820SSatish Balay   if (nsends) {
1733d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1734ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1735606d414cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
17360ac07820SSatish Balay   }
1737606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1738606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
17390ac07820SSatish Balay 
17403a40ed3dSBarry Smith   PetscFunctionReturn(0);
17410ac07820SSatish Balay }
174272dacd9aSBarry Smith 
17435615d1e5SSatish Balay #undef __FUNC__
17445615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1745ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1746ba4ca20aSSatish Balay {
1747ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
174825fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1749133cdb44SSatish Balay   static int  called = 0;
17503a40ed3dSBarry Smith   int         ierr;
1751ba4ca20aSSatish Balay 
1752d64ed03dSBarry Smith   PetscFunctionBegin;
17533a40ed3dSBarry Smith   if (!a->rank) {
17543a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
175525fdafccSSatish Balay   }
175625fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1757d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1758d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
17593a40ed3dSBarry Smith   PetscFunctionReturn(0);
1760ba4ca20aSSatish Balay }
17610ac07820SSatish Balay 
17625615d1e5SSatish Balay #undef __FUNC__
17635615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1764ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1765bb5a7306SBarry Smith {
1766bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1767bb5a7306SBarry Smith   int         ierr;
1768d64ed03dSBarry Smith 
1769d64ed03dSBarry Smith   PetscFunctionBegin;
1770bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
17713a40ed3dSBarry Smith   PetscFunctionReturn(0);
1772bb5a7306SBarry Smith }
1773bb5a7306SBarry Smith 
17742e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
17750ac07820SSatish Balay 
17767fc3c18eSBarry Smith #undef __FUNC__
17777fc3c18eSBarry Smith #define __FUNC__ "MatEqual_MPIBAIJ"
17787fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
17797fc3c18eSBarry Smith {
17807fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
17817fc3c18eSBarry Smith   Mat         a, b, c, d;
17827fc3c18eSBarry Smith   PetscTruth  flg;
17837fc3c18eSBarry Smith   int         ierr;
17847fc3c18eSBarry Smith 
17857fc3c18eSBarry Smith   PetscFunctionBegin;
17867fc3c18eSBarry Smith   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
17877fc3c18eSBarry Smith   a = matA->A; b = matA->B;
17887fc3c18eSBarry Smith   c = matB->A; d = matB->B;
17897fc3c18eSBarry Smith 
17907fc3c18eSBarry Smith   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
17917fc3c18eSBarry Smith   if (flg == PETSC_TRUE) {
17927fc3c18eSBarry Smith     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
17937fc3c18eSBarry Smith   }
17947fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
17957fc3c18eSBarry Smith   PetscFunctionReturn(0);
17967fc3c18eSBarry Smith }
17977fc3c18eSBarry Smith 
179879bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1799cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1800cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1801cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1802cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1803cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1804cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
1805cc2dc46cSBarry Smith   MatMultTrans_MPIBAIJ,
1806cc2dc46cSBarry Smith   MatMultTransAdd_MPIBAIJ,
1807cc2dc46cSBarry Smith   0,
1808cc2dc46cSBarry Smith   0,
1809cc2dc46cSBarry Smith   0,
1810cc2dc46cSBarry Smith   0,
1811cc2dc46cSBarry Smith   0,
1812cc2dc46cSBarry Smith   0,
1813cc2dc46cSBarry Smith   0,
1814cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1815cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
18167fc3c18eSBarry Smith   MatEqual_MPIBAIJ,
1817cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1818cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1819cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1820cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1821cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1822cc2dc46cSBarry Smith   0,
1823cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1824cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1825cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1826cc2dc46cSBarry Smith   0,
1827cc2dc46cSBarry Smith   0,
1828cc2dc46cSBarry Smith   0,
1829cc2dc46cSBarry Smith   0,
1830cc2dc46cSBarry Smith   MatGetSize_MPIBAIJ,
1831cc2dc46cSBarry Smith   MatGetLocalSize_MPIBAIJ,
1832cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1833cc2dc46cSBarry Smith   0,
1834cc2dc46cSBarry Smith   0,
1835cc2dc46cSBarry Smith   0,
1836cc2dc46cSBarry Smith   0,
18372e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1838cc2dc46cSBarry Smith   0,
1839cc2dc46cSBarry Smith   0,
1840cc2dc46cSBarry Smith   0,
1841cc2dc46cSBarry Smith   0,
1842cc2dc46cSBarry Smith   0,
1843cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1844cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1845cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1846cc2dc46cSBarry Smith   0,
1847cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1848cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1849cc2dc46cSBarry Smith   0,
1850cc2dc46cSBarry Smith   0,
1851cc2dc46cSBarry Smith   0,
1852cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1853cc2dc46cSBarry Smith   0,
1854cc2dc46cSBarry Smith   0,
1855cc2dc46cSBarry Smith   0,
1856cc2dc46cSBarry Smith   0,
1857cc2dc46cSBarry Smith   0,
1858cc2dc46cSBarry Smith   0,
1859cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
1860cc2dc46cSBarry Smith   0,
1861cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
1862cc2dc46cSBarry Smith   0,
1863cc2dc46cSBarry Smith   0,
1864cc2dc46cSBarry Smith   0,
1865cc2dc46cSBarry Smith   MatGetMaps_Petsc};
186679bdfe76SSatish Balay 
18675ef9f2a5SBarry Smith 
1868e18c124aSSatish Balay EXTERN_C_BEGIN
18695ef9f2a5SBarry Smith #undef __FUNC__
18705ef9f2a5SBarry Smith #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
18715ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
18725ef9f2a5SBarry Smith {
18735ef9f2a5SBarry Smith   PetscFunctionBegin;
18745ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
18755ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
18765ef9f2a5SBarry Smith   PetscFunctionReturn(0);
18775ef9f2a5SBarry Smith }
1878e18c124aSSatish Balay EXTERN_C_END
187979bdfe76SSatish Balay 
18805615d1e5SSatish Balay #undef __FUNC__
18815615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
188279bdfe76SSatish Balay /*@C
188379bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
188479bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
188579bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
188679bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
188779bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
188879bdfe76SSatish Balay 
1889db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1890db81eaa0SLois Curfman McInnes 
189179bdfe76SSatish Balay    Input Parameters:
1892db81eaa0SLois Curfman McInnes +  comm - MPI communicator
189379bdfe76SSatish Balay .  bs   - size of blockk
189479bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
189592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
189692e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
189792e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
189892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
189992e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
1900be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1901be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
190279bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
190379bdfe76SSatish Balay            submatrix  (same for all local rows)
19047fc3c18eSBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
190592e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
1906db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
190792e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
190879bdfe76SSatish Balay            submatrix (same for all local rows).
19097fc3c18eSBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
191092e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
191192e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
191279bdfe76SSatish Balay 
191379bdfe76SSatish Balay    Output Parameter:
191479bdfe76SSatish Balay .  A - the matrix
191579bdfe76SSatish Balay 
1916db81eaa0SLois Curfman McInnes    Options Database Keys:
1917db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
1918db81eaa0SLois Curfman McInnes                      block calculations (much slower)
1919db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
1920494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
1921494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
19223ffaccefSLois Curfman McInnes 
1923b259b22eSLois Curfman McInnes    Notes:
192479bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
192579bdfe76SSatish Balay    (possibly both).
192679bdfe76SSatish Balay 
1927be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1928be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
1929be79a94dSBarry Smith 
193079bdfe76SSatish Balay    Storage Information:
193179bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
193279bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
193379bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
193479bdfe76SSatish Balay    local matrix (a rectangular submatrix).
193579bdfe76SSatish Balay 
193679bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
193779bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
193879bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
193979bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
194079bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
194179bdfe76SSatish Balay 
194279bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
194379bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
194479bdfe76SSatish Balay 
1945db81eaa0SLois Curfman McInnes .vb
1946db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
1947db81eaa0SLois Curfman McInnes           -------------------
1948db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
1949db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
1950db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
1951db81eaa0SLois Curfman McInnes           -------------------
1952db81eaa0SLois Curfman McInnes .ve
195379bdfe76SSatish Balay 
195479bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
195579bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
195679bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
195757b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
195879bdfe76SSatish Balay 
1959d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1960d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
196179bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
196292e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
196392e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
19646da5968aSLois Curfman McInnes    matrices.
196579bdfe76SSatish Balay 
1966027ccd11SLois Curfman McInnes    Level: intermediate
1967027ccd11SLois Curfman McInnes 
196892e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
196979bdfe76SSatish Balay 
1970db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
197179bdfe76SSatish Balay @*/
197279bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
197379bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
197479bdfe76SSatish Balay {
197579bdfe76SSatish Balay   Mat          B;
197679bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
1977*f1af5d2fSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
1978*f1af5d2fSBarry Smith   PetscTruth   flag1,flag2,flg;
197979bdfe76SSatish Balay 
1980d64ed03dSBarry Smith   PetscFunctionBegin;
1981962fb4a0SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1982962fb4a0SBarry Smith 
1983a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
198436c4a09eSSatish Balay   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
198536c4a09eSSatish Balay   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
19864fdb0a08SBarry Smith   if (d_nnz) {
198736c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
19884fdb0a08SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
19894fdb0a08SBarry Smith     }
19904fdb0a08SBarry Smith   }
19914fdb0a08SBarry Smith   if (o_nnz) {
199236c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
19934fdb0a08SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
19944fdb0a08SBarry Smith     }
19954fdb0a08SBarry Smith   }
19963914022bSBarry Smith 
1997d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1998494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
1999494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2000494eafd4SBarry Smith   if (!flag1 && !flag2 && size == 1) {
20013914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
20023914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
20033914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
20043a40ed3dSBarry Smith     PetscFunctionReturn(0);
20053914022bSBarry Smith   }
20063914022bSBarry Smith 
200779bdfe76SSatish Balay   *A = 0;
20083f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
200979bdfe76SSatish Balay   PLogObjectCreate(B);
201079bdfe76SSatish Balay   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2011549d3d68SSatish Balay   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2012549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
20134c50302cSBarry Smith 
2014e1311b90SBarry Smith   B->ops->destroy    = MatDestroy_MPIBAIJ;
2015e1311b90SBarry Smith   B->ops->view       = MatView_MPIBAIJ;
201690f02eecSBarry Smith   B->mapping    = 0;
201779bdfe76SSatish Balay   B->factor     = 0;
201879bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
201979bdfe76SSatish Balay 
2020e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
2021d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2022d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
202379bdfe76SSatish Balay 
2024d64ed03dSBarry Smith   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2025a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2026d64ed03dSBarry Smith   }
2027a8c6a408SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2028a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2029a8c6a408SBarry Smith   }
2030a8c6a408SBarry Smith   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2031a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2032a8c6a408SBarry Smith   }
2033cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2034cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
203579bdfe76SSatish Balay 
203679bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
203779bdfe76SSatish Balay     work[0] = m; work[1] = n;
203879bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
2039ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
204079bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
204179bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
204279bdfe76SSatish Balay   }
204379bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
204479bdfe76SSatish Balay     Mbs = M/bs;
2045a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
204679bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
204779bdfe76SSatish Balay     m   = mbs*bs;
204879bdfe76SSatish Balay   }
204979bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
205079bdfe76SSatish Balay     Nbs = N/bs;
2051a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
205279bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
205379bdfe76SSatish Balay     n   = nbs*bs;
205479bdfe76SSatish Balay   }
2055a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
2056a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2057a8c6a408SBarry Smith   }
205879bdfe76SSatish Balay 
205979bdfe76SSatish Balay   b->m = m; B->m = m;
206079bdfe76SSatish Balay   b->n = n; B->n = n;
206179bdfe76SSatish Balay   b->N = N; B->N = N;
206279bdfe76SSatish Balay   b->M = M; B->M = M;
206379bdfe76SSatish Balay   b->bs  = bs;
206479bdfe76SSatish Balay   b->bs2 = bs*bs;
206579bdfe76SSatish Balay   b->mbs = mbs;
206679bdfe76SSatish Balay   b->nbs = nbs;
206779bdfe76SSatish Balay   b->Mbs = Mbs;
206879bdfe76SSatish Balay   b->Nbs = Nbs;
206979bdfe76SSatish Balay 
2070c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
2071c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
2072488ecbafSBarry Smith   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2073488ecbafSBarry Smith   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2074c7fcc2eaSBarry Smith 
207579bdfe76SSatish Balay   /* build local table of row and column ownerships */
2076ff2fd236SBarry Smith   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2077ff2fd236SBarry Smith   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
20780ac07820SSatish Balay   b->cowners    = b->rowners + b->size + 2;
2079ff2fd236SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2080ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
208179bdfe76SSatish Balay   b->rowners[0]    = 0;
208279bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
208379bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
208479bdfe76SSatish Balay   }
2085ff2fd236SBarry Smith   for ( i=0; i<=b->size; i++ ) {
2086ff2fd236SBarry Smith     b->rowners_bs[i] = b->rowners[i]*bs;
2087ff2fd236SBarry Smith   }
208879bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
208979bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
20904fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
20914fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
20924fa0d573SSatish Balay 
2093ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
209479bdfe76SSatish Balay   b->cowners[0] = 0;
209579bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
209679bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
209779bdfe76SSatish Balay   }
209879bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
209979bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
21004fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
21014fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
210279bdfe76SSatish Balay 
2103a07cd24cSSatish Balay 
210479bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2105029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
210679bdfe76SSatish Balay   PLogObjectParent(B,b->A);
210779bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2108029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
210979bdfe76SSatish Balay   PLogObjectParent(B,b->B);
211079bdfe76SSatish Balay 
211179bdfe76SSatish Balay   /* build cache for off array entries formed */
21128798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
21138798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
211490f02eecSBarry Smith   b->donotstash  = 0;
211579bdfe76SSatish Balay   b->colmap      = 0;
211679bdfe76SSatish Balay   b->garray      = 0;
211779bdfe76SSatish Balay   b->roworiented = 1;
211879bdfe76SSatish Balay 
211930793edcSSatish Balay   /* stuff used in block assembly */
212030793edcSSatish Balay   b->barray       = 0;
212130793edcSSatish Balay 
212279bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
212379bdfe76SSatish Balay   b->lvec         = 0;
212479bdfe76SSatish Balay   b->Mvctx        = 0;
212579bdfe76SSatish Balay 
212679bdfe76SSatish Balay   /* stuff for MatGetRow() */
212779bdfe76SSatish Balay   b->rowindices   = 0;
212879bdfe76SSatish Balay   b->rowvalues    = 0;
212979bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
213079bdfe76SSatish Balay 
2131a07cd24cSSatish Balay   /* hash table stuff */
2132a07cd24cSSatish Balay   b->ht           = 0;
2133187ce0cbSSatish Balay   b->hd           = 0;
21340bdbc534SSatish Balay   b->ht_size      = 0;
2135133cdb44SSatish Balay   b->ht_flag      = 0;
213625fdafccSSatish Balay   b->ht_fact      = 0;
2137187ce0cbSSatish Balay   b->ht_total_ct  = 0;
2138187ce0cbSSatish Balay   b->ht_insert_ct = 0;
2139a07cd24cSSatish Balay 
214079bdfe76SSatish Balay   *A = B;
2141133cdb44SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2142133cdb44SSatish Balay   if (flg) {
2143133cdb44SSatish Balay     double fact = 1.39;
2144133cdb44SSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2145*f1af5d2fSBarry Smith     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2146133cdb44SSatish Balay     if (fact <= 1.0) fact = 1.39;
2147133cdb44SSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2148133cdb44SSatish Balay     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2149133cdb44SSatish Balay   }
2150*f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
21517fc3c18eSBarry Smith                                      "MatStoreValues_MPIBAIJ",
21527fc3c18eSBarry Smith                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2153*f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
21547fc3c18eSBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
21557fc3c18eSBarry Smith                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2156*f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
21575ef9f2a5SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
21585ef9f2a5SBarry Smith                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
21593a40ed3dSBarry Smith   PetscFunctionReturn(0);
216079bdfe76SSatish Balay }
2161026e39d0SSatish Balay 
21625615d1e5SSatish Balay #undef __FUNC__
21632e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ"
21642e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
21650ac07820SSatish Balay {
21660ac07820SSatish Balay   Mat         mat;
21670ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2168*f1af5d2fSBarry Smith   int         ierr, len=0;
2169*f1af5d2fSBarry Smith   PetscTruth  flg;
21700ac07820SSatish Balay 
2171d64ed03dSBarry Smith   PetscFunctionBegin;
21720ac07820SSatish Balay   *newmat       = 0;
21733f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
21740ac07820SSatish Balay   PLogObjectCreate(mat);
21750ac07820SSatish Balay   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2176549d3d68SSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2177e1311b90SBarry Smith   mat->ops->destroy = MatDestroy_MPIBAIJ;
2178e1311b90SBarry Smith   mat->ops->view    = MatView_MPIBAIJ;
21790ac07820SSatish Balay   mat->factor       = matin->factor;
21800ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
2181aef5e8e0SSatish Balay   mat->insertmode   = NOT_SET_VALUES;
21820ac07820SSatish Balay 
21830ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
21840ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
21850ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
21860ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
21870ac07820SSatish Balay 
21880ac07820SSatish Balay   a->bs  = oldmat->bs;
21890ac07820SSatish Balay   a->bs2 = oldmat->bs2;
21900ac07820SSatish Balay   a->mbs = oldmat->mbs;
21910ac07820SSatish Balay   a->nbs = oldmat->nbs;
21920ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
21930ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
21940ac07820SSatish Balay 
21950ac07820SSatish Balay   a->rstart       = oldmat->rstart;
21960ac07820SSatish Balay   a->rend         = oldmat->rend;
21970ac07820SSatish Balay   a->cstart       = oldmat->cstart;
21980ac07820SSatish Balay   a->cend         = oldmat->cend;
21990ac07820SSatish Balay   a->size         = oldmat->size;
22000ac07820SSatish Balay   a->rank         = oldmat->rank;
2201aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2202aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2203aef5e8e0SSatish Balay   a->rowindices   = 0;
22040ac07820SSatish Balay   a->rowvalues    = 0;
22050ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
220630793edcSSatish Balay   a->barray       = 0;
22073123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
22083123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
22093123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
22103123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
22110ac07820SSatish Balay 
2212133cdb44SSatish Balay   /* hash table stuff */
2213133cdb44SSatish Balay   a->ht           = 0;
2214133cdb44SSatish Balay   a->hd           = 0;
2215133cdb44SSatish Balay   a->ht_size      = 0;
2216133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
221725fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2218133cdb44SSatish Balay   a->ht_total_ct  = 0;
2219133cdb44SSatish Balay   a->ht_insert_ct = 0;
2220133cdb44SSatish Balay 
2221133cdb44SSatish Balay 
2222ff2fd236SBarry Smith   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2223ff2fd236SBarry Smith   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
22240ac07820SSatish Balay   a->cowners    = a->rowners + a->size + 2;
2225ff2fd236SBarry Smith   a->rowners_bs = a->cowners + a->size + 2;
2226549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
22278798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
22288798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
22290ac07820SSatish Balay   if (oldmat->colmap) {
2230aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
22310f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
223248e59246SSatish Balay #else
22330ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
22340ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2235549d3d68SSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
223648e59246SSatish Balay #endif
22370ac07820SSatish Balay   } else a->colmap = 0;
22380ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
22390ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
22400ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
2241549d3d68SSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
22420ac07820SSatish Balay   } else a->garray = 0;
22430ac07820SSatish Balay 
22440ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
22450ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
22460ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2247e18c124aSSatish Balay 
22480ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
22492e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
22500ac07820SSatish Balay   PLogObjectParent(mat,a->A);
22512e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
22520ac07820SSatish Balay   PLogObjectParent(mat,a->B);
22530ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2254e18c124aSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
22550ac07820SSatish Balay   if (flg) {
22560ac07820SSatish Balay     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
22570ac07820SSatish Balay   }
22580ac07820SSatish Balay   *newmat = mat;
22593a40ed3dSBarry Smith   PetscFunctionReturn(0);
22600ac07820SSatish Balay }
226157b952d6SSatish Balay 
226257b952d6SSatish Balay #include "sys.h"
226357b952d6SSatish Balay 
22645615d1e5SSatish Balay #undef __FUNC__
22655615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
226657b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
226757b952d6SSatish Balay {
226857b952d6SSatish Balay   Mat          A;
226957b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
227057b952d6SSatish Balay   Scalar       *vals,*buf;
227157b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
227257b952d6SSatish Balay   MPI_Status   status;
2273cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
227457b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2275*f1af5d2fSBarry Smith   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
227657b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
227757b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
227857b952d6SSatish Balay 
2279d64ed03dSBarry Smith   PetscFunctionBegin;
2280*f1af5d2fSBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
228157b952d6SSatish Balay 
2282d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2283d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
228457b952d6SSatish Balay   if (!rank) {
228557b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2286e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2287a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2288d64ed03dSBarry Smith     if (header[3] < 0) {
2289a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2290d64ed03dSBarry Smith     }
22916c5fab8fSBarry Smith   }
2292d64ed03dSBarry Smith 
2293ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
229457b952d6SSatish Balay   M = header[1]; N = header[2];
229557b952d6SSatish Balay 
2296a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
229757b952d6SSatish Balay 
229857b952d6SSatish Balay   /*
229957b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
230057b952d6SSatish Balay      divisible by the blocksize
230157b952d6SSatish Balay   */
230257b952d6SSatish Balay   Mbs        = M/bs;
230357b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
230457b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
230557b952d6SSatish Balay   else                  Mbs++;
230657b952d6SSatish Balay   if (extra_rows &&!rank) {
2307b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
230857b952d6SSatish Balay   }
2309537820f0SBarry Smith 
231057b952d6SSatish Balay   /* determine ownership of all rows */
231157b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
231257b952d6SSatish Balay   m   = mbs * bs;
2313cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2314cee3aa6bSSatish Balay   browners = rowners + size + 1;
2315ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
231657b952d6SSatish Balay   rowners[0] = 0;
2317cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2318cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
231957b952d6SSatish Balay   rstart = rowners[rank];
232057b952d6SSatish Balay   rend   = rowners[rank+1];
232157b952d6SSatish Balay 
232257b952d6SSatish Balay   /* distribute row lengths to all processors */
232357b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
232457b952d6SSatish Balay   if (!rank) {
232557b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2326e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
232757b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
232857b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2329cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2330ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2331606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2332d64ed03dSBarry Smith   } else {
2333ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
233457b952d6SSatish Balay   }
233557b952d6SSatish Balay 
233657b952d6SSatish Balay   if (!rank) {
233757b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
233857b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2339549d3d68SSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
234057b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
234157b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
234257b952d6SSatish Balay         procsnz[i] += rowlengths[j];
234357b952d6SSatish Balay       }
234457b952d6SSatish Balay     }
2345606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
234657b952d6SSatish Balay 
234757b952d6SSatish Balay     /* determine max buffer needed and allocate it */
234857b952d6SSatish Balay     maxnz = 0;
234957b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
235057b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
235157b952d6SSatish Balay     }
235257b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
235357b952d6SSatish Balay 
235457b952d6SSatish Balay     /* read in my part of the matrix column indices  */
235557b952d6SSatish Balay     nz = procsnz[0];
235657b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
235757b952d6SSatish Balay     mycols = ibuf;
2358cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2359e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2360cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2361cee3aa6bSSatish Balay 
236257b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
236357b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
236457b952d6SSatish Balay       nz   = procsnz[i];
2365e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2366ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
236757b952d6SSatish Balay     }
236857b952d6SSatish Balay     /* read in the stuff for the last proc */
236957b952d6SSatish Balay     if ( size != 1 ) {
237057b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2371e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
237257b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2373ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
237457b952d6SSatish Balay     }
2375606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2376d64ed03dSBarry Smith   } else {
237757b952d6SSatish Balay     /* determine buffer space needed for message */
237857b952d6SSatish Balay     nz = 0;
237957b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
238057b952d6SSatish Balay       nz += locrowlens[i];
238157b952d6SSatish Balay     }
238257b952d6SSatish Balay     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
238357b952d6SSatish Balay     mycols = ibuf;
238457b952d6SSatish Balay     /* receive message of column indices*/
2385ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2386ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2387a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
238857b952d6SSatish Balay   }
238957b952d6SSatish Balay 
239057b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2391cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2392cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
239357b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2394549d3d68SSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
239557b952d6SSatish Balay   masked1 = mask    + Mbs;
239657b952d6SSatish Balay   masked2 = masked1 + Mbs;
239757b952d6SSatish Balay   rowcount = 0; nzcount = 0;
239857b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
239957b952d6SSatish Balay     dcount  = 0;
240057b952d6SSatish Balay     odcount = 0;
240157b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
240257b952d6SSatish Balay       kmax = locrowlens[rowcount];
240357b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
240457b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
240557b952d6SSatish Balay         if (!mask[tmp]) {
240657b952d6SSatish Balay           mask[tmp] = 1;
240757b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
240857b952d6SSatish Balay           else masked1[dcount++] = tmp;
240957b952d6SSatish Balay         }
241057b952d6SSatish Balay       }
241157b952d6SSatish Balay       rowcount++;
241257b952d6SSatish Balay     }
2413cee3aa6bSSatish Balay 
241457b952d6SSatish Balay     dlens[i]  = dcount;
241557b952d6SSatish Balay     odlens[i] = odcount;
2416cee3aa6bSSatish Balay 
241757b952d6SSatish Balay     /* zero out the mask elements we set */
241857b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
241957b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
242057b952d6SSatish Balay   }
2421cee3aa6bSSatish Balay 
242257b952d6SSatish Balay   /* create our matrix */
2423549d3d68SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
242457b952d6SSatish Balay   A = *newmat;
24256d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
242657b952d6SSatish Balay 
242757b952d6SSatish Balay   if (!rank) {
242857b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
242957b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
243057b952d6SSatish Balay     nz = procsnz[0];
243157b952d6SSatish Balay     vals = buf;
2432cee3aa6bSSatish Balay     mycols = ibuf;
2433cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2434e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2435cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2436537820f0SBarry Smith 
243757b952d6SSatish Balay     /* insert into matrix */
243857b952d6SSatish Balay     jj      = rstart*bs;
243957b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
244057b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
244157b952d6SSatish Balay       mycols += locrowlens[i];
244257b952d6SSatish Balay       vals   += locrowlens[i];
244357b952d6SSatish Balay       jj++;
244457b952d6SSatish Balay     }
244557b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
244657b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
244757b952d6SSatish Balay       nz   = procsnz[i];
244857b952d6SSatish Balay       vals = buf;
2449e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2450ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
245157b952d6SSatish Balay     }
245257b952d6SSatish Balay     /* the last proc */
245357b952d6SSatish Balay     if ( size != 1 ){
245457b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2455cee3aa6bSSatish Balay       vals = buf;
2456e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
245757b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2458ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
245957b952d6SSatish Balay     }
2460606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2461d64ed03dSBarry Smith   } else {
246257b952d6SSatish Balay     /* receive numeric values */
246357b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
246457b952d6SSatish Balay 
246557b952d6SSatish Balay     /* receive message of values*/
246657b952d6SSatish Balay     vals   = buf;
2467cee3aa6bSSatish Balay     mycols = ibuf;
2468ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2469ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2470a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
247157b952d6SSatish Balay 
247257b952d6SSatish Balay     /* insert into matrix */
247357b952d6SSatish Balay     jj      = rstart*bs;
2474cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
247557b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
247657b952d6SSatish Balay       mycols += locrowlens[i];
247757b952d6SSatish Balay       vals   += locrowlens[i];
247857b952d6SSatish Balay       jj++;
247957b952d6SSatish Balay     }
248057b952d6SSatish Balay   }
2481606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2482606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2483606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2484606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2485606d414cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2486606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
24876d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
24886d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
24893a40ed3dSBarry Smith   PetscFunctionReturn(0);
249057b952d6SSatish Balay }
249157b952d6SSatish Balay 
2492133cdb44SSatish Balay #undef __FUNC__
2493133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2494133cdb44SSatish Balay /*@
2495133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2496133cdb44SSatish Balay 
2497133cdb44SSatish Balay    Input Parameters:
2498133cdb44SSatish Balay .  mat  - the matrix
2499133cdb44SSatish Balay .  fact - factor
2500133cdb44SSatish Balay 
2501fee21e36SBarry Smith    Collective on Mat
2502fee21e36SBarry Smith 
25038c890885SBarry Smith    Level: advanced
25048c890885SBarry Smith 
2505133cdb44SSatish Balay   Notes:
2506133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2507133cdb44SSatish Balay 
2508133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2509133cdb44SSatish Balay 
2510133cdb44SSatish Balay .seealso: MatSetOption()
2511133cdb44SSatish Balay @*/
2512133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2513133cdb44SSatish Balay {
251425fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2515133cdb44SSatish Balay 
2516133cdb44SSatish Balay   PetscFunctionBegin;
2517133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
251825fdafccSSatish Balay   if (mat->type != MATMPIBAIJ) {
2519133cdb44SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2520133cdb44SSatish Balay   }
2521133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*) mat->data;
2522133cdb44SSatish Balay   baij->ht_fact = fact;
2523133cdb44SSatish Balay   PetscFunctionReturn(0);
2524133cdb44SSatish Balay }
2525