xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision c38d4ed214221df9ea04de46f7761bef149d00ff)
1*c38d4ed2SBarry Smith /*$Id: mpibaij.c,v 1.183 1999/11/05 14:45:37 bsmith Exp bsmith $*/
279bdfe76SSatish Balay 
377ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
579bdfe76SSatish Balay 
657b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
757b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
8d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
97b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
163b2fbd54SBarry Smith 
177fc3c18eSBarry Smith EXTERN_C_BEGIN
187fc3c18eSBarry Smith #undef __FUNC__
197fc3c18eSBarry Smith #define __FUNC__ "MatStoreValues_MPIBAIJ"
207fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat)
217fc3c18eSBarry Smith {
227fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
237fc3c18eSBarry Smith   int         ierr;
247fc3c18eSBarry Smith 
257fc3c18eSBarry Smith   PetscFunctionBegin;
267fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
277fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
287fc3c18eSBarry Smith   PetscFunctionReturn(0);
297fc3c18eSBarry Smith }
307fc3c18eSBarry Smith EXTERN_C_END
317fc3c18eSBarry Smith 
327fc3c18eSBarry Smith EXTERN_C_BEGIN
337fc3c18eSBarry Smith #undef __FUNC__
347fc3c18eSBarry Smith #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
357fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat)
367fc3c18eSBarry Smith {
377fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
387fc3c18eSBarry Smith   int         ierr;
397fc3c18eSBarry Smith 
407fc3c18eSBarry Smith   PetscFunctionBegin;
417fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
427fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
437fc3c18eSBarry Smith   PetscFunctionReturn(0);
447fc3c18eSBarry Smith }
457fc3c18eSBarry Smith EXTERN_C_END
467fc3c18eSBarry Smith 
47537820f0SBarry Smith /*
48537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
4957b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
5057b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
5157b952d6SSatish Balay    length of colmap equals the global matrix length.
5257b952d6SSatish Balay */
535615d1e5SSatish Balay #undef __FUNC__
545615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
5557b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
5657b952d6SSatish Balay {
5757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
5857b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
59dc2900e9SSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
6057b952d6SSatish Balay 
61d64ed03dSBarry Smith   PetscFunctionBegin;
62aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
630f5bd95cSBarry Smith   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
6448e59246SSatish Balay   for ( i=0; i<nbs; i++ ){
650f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
6648e59246SSatish Balay   }
6748e59246SSatish Balay #else
68758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
6957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
70549d3d68SSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
71928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
7248e59246SSatish Balay #endif
733a40ed3dSBarry Smith   PetscFunctionReturn(0);
7457b952d6SSatish Balay }
7557b952d6SSatish Balay 
7680c1aa95SSatish Balay #define CHUNKSIZE  10
7780c1aa95SSatish Balay 
78f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7980c1aa95SSatish Balay { \
8080c1aa95SSatish Balay  \
8180c1aa95SSatish Balay     brow = row/bs;  \
8280c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
8480c1aa95SSatish Balay       bcol = col/bs; \
8580c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
86ab26458aSBarry Smith       low = 0; high = nrow; \
87ab26458aSBarry Smith       while (high-low > 3) { \
88ab26458aSBarry Smith         t = (low+high)/2; \
89ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
90ab26458aSBarry Smith         else              low  = t; \
91ab26458aSBarry Smith       } \
92ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
9380c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
9480c1aa95SSatish Balay         if (rp[_i] == bcol) { \
9580c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
97eada6651SSatish Balay           else                    *bap  = value;  \
98ac7a638eSSatish Balay           goto a_noinsert; \
9980c1aa95SSatish Balay         } \
10080c1aa95SSatish Balay       } \
10189280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
102a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
10380c1aa95SSatish Balay       if (nrow >= rmax) { \
10480c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
10580c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
10680c1aa95SSatish Balay         Scalar *new_a; \
10780c1aa95SSatish Balay  \
108a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
10989280ab3SLois Curfman McInnes  \
11080c1aa95SSatish Balay         /* malloc new storage space */ \
11180c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
11280c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
11380c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
11480c1aa95SSatish Balay         new_i   = new_j + new_nz; \
11580c1aa95SSatish Balay  \
11680c1aa95SSatish Balay         /* copy over old data into new slots */ \
11780c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
11880c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
12080c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121549d3d68SSatish Balay         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
122549d3d68SSatish Balay                           len*sizeof(int));CHKERRQ(ierr); \
123549d3d68SSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
124549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
125549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
126549d3d68SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
12780c1aa95SSatish Balay         /* free up old matrix storage */ \
128606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
129606d414cSSatish Balay         if (!a->singlemalloc) { \
130606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
131606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
132606d414cSSatish Balay         } \
13380c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
13480c1aa95SSatish Balay         a->singlemalloc = 1; \
13580c1aa95SSatish Balay  \
13680c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
137ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
13880c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
13980c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
14080c1aa95SSatish Balay         a->reallocs++; \
14180c1aa95SSatish Balay         a->nz++; \
14280c1aa95SSatish Balay       } \
14380c1aa95SSatish Balay       N = nrow++ - 1;  \
14480c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
14580c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
14680c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
147549d3d68SSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
14880c1aa95SSatish Balay       } \
149549d3d68SSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
15080c1aa95SSatish Balay       rp[_i]                      = bcol;  \
15180c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
152ac7a638eSSatish Balay       a_noinsert:; \
15380c1aa95SSatish Balay     ailen[brow] = nrow; \
15480c1aa95SSatish Balay }
15557b952d6SSatish Balay 
156ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
157ac7a638eSSatish Balay { \
158ac7a638eSSatish Balay  \
159ac7a638eSSatish Balay     brow = row/bs;  \
160ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
161ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
162ac7a638eSSatish Balay       bcol = col/bs; \
163ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
164ac7a638eSSatish Balay       low = 0; high = nrow; \
165ac7a638eSSatish Balay       while (high-low > 3) { \
166ac7a638eSSatish Balay         t = (low+high)/2; \
167ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
168ac7a638eSSatish Balay         else              low  = t; \
169ac7a638eSSatish Balay       } \
170ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
171ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
172ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
173ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
174ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
175ac7a638eSSatish Balay           else                    *bap  = value;  \
176ac7a638eSSatish Balay           goto b_noinsert; \
177ac7a638eSSatish Balay         } \
178ac7a638eSSatish Balay       } \
17989280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
180a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
181ac7a638eSSatish Balay       if (nrow >= rmax) { \
182ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
18374c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
184ac7a638eSSatish Balay         Scalar *new_a; \
185ac7a638eSSatish Balay  \
186a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
18789280ab3SLois Curfman McInnes  \
188ac7a638eSSatish Balay         /* malloc new storage space */ \
18974c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
190ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
191ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
192ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
193ac7a638eSSatish Balay  \
194ac7a638eSSatish Balay         /* copy over old data into new slots */ \
195ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
19674c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
197549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
198ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
199549d3d68SSatish Balay         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
200549d3d68SSatish Balay                            len*sizeof(int));CHKERRQ(ierr); \
201549d3d68SSatish Balay         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
202549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
203549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
204549d3d68SSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
205ac7a638eSSatish Balay         /* free up old matrix storage */ \
206606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
207606d414cSSatish Balay         if (!b->singlemalloc) { \
208606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
209606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
210606d414cSSatish Balay         } \
21174c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
21274c639caSSatish Balay         b->singlemalloc = 1; \
213ac7a638eSSatish Balay  \
214ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
215ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
21674c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
21774c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
21874c639caSSatish Balay         b->reallocs++; \
21974c639caSSatish Balay         b->nz++; \
220ac7a638eSSatish Balay       } \
221ac7a638eSSatish Balay       N = nrow++ - 1;  \
222ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
223ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
224ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
225549d3d68SSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
226ac7a638eSSatish Balay       } \
227549d3d68SSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
228ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
229ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
230ac7a638eSSatish Balay       b_noinsert:; \
231ac7a638eSSatish Balay     bilen[brow] = nrow; \
232ac7a638eSSatish Balay }
233ac7a638eSSatish Balay 
2345615d1e5SSatish Balay #undef __FUNC__
2355615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
236ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
23757b952d6SSatish Balay {
23857b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
23957b952d6SSatish Balay   Scalar      value;
2404fa0d573SSatish Balay   int         ierr,i,j,row,col;
2414fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2424fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2434fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
24457b952d6SSatish Balay 
245eada6651SSatish Balay   /* Some Variables required in the macro */
24680c1aa95SSatish Balay   Mat         A = baij->A;
24780c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
248ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
249ac7a638eSSatish Balay   Scalar      *aa=a->a;
250ac7a638eSSatish Balay 
251ac7a638eSSatish Balay   Mat         B = baij->B;
252ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
253ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
254ac7a638eSSatish Balay   Scalar      *ba=b->a;
255ac7a638eSSatish Balay 
256ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
257ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
258ac7a638eSSatish Balay   Scalar      *ap,*bap;
25980c1aa95SSatish Balay 
260d64ed03dSBarry Smith   PetscFunctionBegin;
26157b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
2625ef9f2a5SBarry Smith     if (im[i] < 0) continue;
263aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
264a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
265639f9d9dSBarry Smith #endif
26657b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
26757b952d6SSatish Balay       row = im[i] - rstart_orig;
26857b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
26957b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
27057b952d6SSatish Balay           col = in[j] - cstart_orig;
27157b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
272f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
27380c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
27473959e64SBarry Smith         } else if (in[j] < 0) continue;
275aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
276a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
277639f9d9dSBarry Smith #endif
27857b952d6SSatish Balay         else {
27957b952d6SSatish Balay           if (mat->was_assembled) {
280905e6a2fSBarry Smith             if (!baij->colmap) {
281905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
282905e6a2fSBarry Smith             }
283aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
2840f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
285fa46199cSSatish Balay             col  = col - 1 + in[j]%bs;
28648e59246SSatish Balay #else
287905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
28848e59246SSatish Balay #endif
28957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
29057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
29157b952d6SSatish Balay               col =  in[j];
2929bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2939bf004c3SSatish Balay               B = baij->B;
2949bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2959bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2969bf004c3SSatish Balay               ba=b->a;
29757b952d6SSatish Balay             }
298d64ed03dSBarry Smith           } else col = in[j];
29957b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
300ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
301ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
30257b952d6SSatish Balay         }
30357b952d6SSatish Balay       }
304d64ed03dSBarry Smith     } else {
30590f02eecSBarry Smith       if ( !baij->donotstash) {
306ff2fd236SBarry Smith         if (roworiented) {
3078798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
308ff2fd236SBarry Smith         } else {
3098798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
31057b952d6SSatish Balay         }
31157b952d6SSatish Balay       }
31257b952d6SSatish Balay     }
31390f02eecSBarry Smith   }
3143a40ed3dSBarry Smith   PetscFunctionReturn(0);
31557b952d6SSatish Balay }
31657b952d6SSatish Balay 
317ab26458aSBarry Smith #undef __FUNC__
318ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
319ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
320ab26458aSBarry Smith {
321ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
32230793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
3237ef1d9bdSSatish Balay   int         ierr,i,j,ii,jj,row,col;
324ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
325ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
326ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
327ab26458aSBarry Smith 
328b16ae2b1SBarry Smith   PetscFunctionBegin;
32930793edcSSatish Balay   if(!barray) {
33047513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
33130793edcSSatish Balay   }
33230793edcSSatish Balay 
333ab26458aSBarry Smith   if (roworiented) {
334ab26458aSBarry Smith     stepval = (n-1)*bs;
335ab26458aSBarry Smith   } else {
336ab26458aSBarry Smith     stepval = (m-1)*bs;
337ab26458aSBarry Smith   }
338ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
3395ef9f2a5SBarry Smith     if (im[i] < 0) continue;
340aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
3415ef9f2a5SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
342ab26458aSBarry Smith #endif
343ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
344ab26458aSBarry Smith       row = im[i] - rstart;
345ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
34615b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
34715b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
34815b57d14SSatish Balay           barray = v + i*bs2;
34915b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
35015b57d14SSatish Balay           barray = v + j*bs2;
35115b57d14SSatish Balay         } else { /* Here a copy is required */
352ab26458aSBarry Smith           if (roworiented) {
353ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
354ab26458aSBarry Smith           } else {
355ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
356abef11f7SSatish Balay           }
35747513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
35847513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
35930793edcSSatish Balay               *barray++  = *value++;
36047513183SBarry Smith             }
36147513183SBarry Smith           }
36230793edcSSatish Balay           barray -=bs2;
36315b57d14SSatish Balay         }
364abef11f7SSatish Balay 
365abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
366abef11f7SSatish Balay           col  = in[j] - cstart;
36730793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
368ab26458aSBarry Smith         }
3695ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
370aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
3715ef9f2a5SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
372ab26458aSBarry Smith #endif
373ab26458aSBarry Smith         else {
374ab26458aSBarry Smith           if (mat->was_assembled) {
375ab26458aSBarry Smith             if (!baij->colmap) {
376ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
377ab26458aSBarry Smith             }
378a5eb4965SSatish Balay 
379aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
380aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
381fa46199cSSatish Balay             { int data;
3820f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3830f5bd95cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
384fa46199cSSatish Balay             }
38548e59246SSatish Balay #else
3860f5bd95cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
387a5eb4965SSatish Balay #endif
38848e59246SSatish Balay #endif
389aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
3900f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
391fa46199cSSatish Balay             col  = (col - 1)/bs;
39248e59246SSatish Balay #else
393a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
39448e59246SSatish Balay #endif
395ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
396ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
397ab26458aSBarry Smith               col =  in[j];
398ab26458aSBarry Smith             }
399ab26458aSBarry Smith           }
400ab26458aSBarry Smith           else col = in[j];
40130793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
402ab26458aSBarry Smith         }
403ab26458aSBarry Smith       }
404d64ed03dSBarry Smith     } else {
405ab26458aSBarry Smith       if (!baij->donotstash) {
406ff2fd236SBarry Smith         if (roworiented) {
4078798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
408ff2fd236SBarry Smith         } else {
4098798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
410ff2fd236SBarry Smith         }
411abef11f7SSatish Balay       }
412ab26458aSBarry Smith     }
413ab26458aSBarry Smith   }
4143a40ed3dSBarry Smith   PetscFunctionReturn(0);
415ab26458aSBarry Smith }
4160bdbc534SSatish Balay #define HASH_KEY 0.6180339887
417c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
418c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
419c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
4205615d1e5SSatish Balay #undef __FUNC__
4210bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
4220bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4230bdbc534SSatish Balay {
4240bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4250bdbc534SSatish Balay   int         ierr,i,j,row,col;
4260bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
427c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
428c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
429c2760754SSatish Balay   double      tmp;
430b9e4cc15SSatish Balay   Scalar      ** HD = baij->hd,value;
431aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4324a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4334a15367fSSatish Balay #endif
4340bdbc534SSatish Balay 
4350bdbc534SSatish Balay   PetscFunctionBegin;
4360bdbc534SSatish Balay 
4370bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
438aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4390bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4400bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4410bdbc534SSatish Balay #endif
4420bdbc534SSatish Balay       row = im[i];
443c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4440bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4450bdbc534SSatish Balay         col = in[j];
4460bdbc534SSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4470bdbc534SSatish Balay         /* Look up into the Hash Table */
448c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
449c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4500bdbc534SSatish Balay 
451c2760754SSatish Balay 
452c2760754SSatish Balay         idx = h1;
453aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
454187ce0cbSSatish Balay         insert_ct++;
455187ce0cbSSatish Balay         total_ct++;
456187ce0cbSSatish Balay         if (HT[idx] != key) {
457187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
458187ce0cbSSatish Balay           if (idx == size) {
459187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
460187ce0cbSSatish Balay             if (idx == h1) {
461187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
462187ce0cbSSatish Balay             }
463187ce0cbSSatish Balay           }
464187ce0cbSSatish Balay         }
465187ce0cbSSatish Balay #else
466c2760754SSatish Balay         if (HT[idx] != key) {
467c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
468c2760754SSatish Balay           if (idx == size) {
469c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
470c2760754SSatish Balay             if (idx == h1) {
471c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
472c2760754SSatish Balay             }
473c2760754SSatish Balay           }
474c2760754SSatish Balay         }
475187ce0cbSSatish Balay #endif
476c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
477c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
478c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
4790bdbc534SSatish Balay       }
4800bdbc534SSatish Balay     } else {
4810bdbc534SSatish Balay       if (!baij->donotstash) {
482ff2fd236SBarry Smith         if (roworiented) {
4838798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
484ff2fd236SBarry Smith         } else {
4858798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4860bdbc534SSatish Balay         }
4870bdbc534SSatish Balay       }
4880bdbc534SSatish Balay     }
4890bdbc534SSatish Balay   }
490aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
491187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
492187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
493187ce0cbSSatish Balay #endif
4940bdbc534SSatish Balay   PetscFunctionReturn(0);
4950bdbc534SSatish Balay }
4960bdbc534SSatish Balay 
4970bdbc534SSatish Balay #undef __FUNC__
4980bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
4990bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
5000bdbc534SSatish Balay {
5010bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
5028798bf22SSatish Balay   int         ierr,i,j,ii,jj,row,col;
5030bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
504b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
505c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
506c2760754SSatish Balay   double      tmp;
507187ce0cbSSatish Balay   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
508aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5094a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5104a15367fSSatish Balay #endif
5110bdbc534SSatish Balay 
512d0a41580SSatish Balay   PetscFunctionBegin;
513d0a41580SSatish Balay 
5140bdbc534SSatish Balay   if (roworiented) {
5150bdbc534SSatish Balay     stepval = (n-1)*bs;
5160bdbc534SSatish Balay   } else {
5170bdbc534SSatish Balay     stepval = (m-1)*bs;
5180bdbc534SSatish Balay   }
5190bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
520aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5210bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
5220bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
5230bdbc534SSatish Balay #endif
5240bdbc534SSatish Balay     row   = im[i];
525187ce0cbSSatish Balay     v_t   = v + i*bs2;
526c2760754SSatish Balay     if (row >= rstart && row < rend) {
5270bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
5280bdbc534SSatish Balay         col = in[j];
5290bdbc534SSatish Balay 
5300bdbc534SSatish Balay         /* Look up into the Hash Table */
531c2760754SSatish Balay         key = row*Nbs+col+1;
532c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5330bdbc534SSatish Balay 
534c2760754SSatish Balay         idx = h1;
535aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
536187ce0cbSSatish Balay         total_ct++;
537187ce0cbSSatish Balay         insert_ct++;
538187ce0cbSSatish Balay        if (HT[idx] != key) {
539187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
540187ce0cbSSatish Balay           if (idx == size) {
541187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
542187ce0cbSSatish Balay             if (idx == h1) {
543187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
544187ce0cbSSatish Balay             }
545187ce0cbSSatish Balay           }
546187ce0cbSSatish Balay         }
547187ce0cbSSatish Balay #else
548c2760754SSatish Balay         if (HT[idx] != key) {
549c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
550c2760754SSatish Balay           if (idx == size) {
551c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
552c2760754SSatish Balay             if (idx == h1) {
553c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
554c2760754SSatish Balay             }
555c2760754SSatish Balay           }
556c2760754SSatish Balay         }
557187ce0cbSSatish Balay #endif
558c2760754SSatish Balay         baij_a = HD[idx];
5590bdbc534SSatish Balay         if (roworiented) {
560c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
561187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
562187ce0cbSSatish Balay           value = v_t;
563187ce0cbSSatish Balay           v_t  += bs;
564fef45726SSatish Balay           if (addv == ADD_VALUES) {
565c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
566c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
567fef45726SSatish Balay                 baij_a[jj]  += *value++;
568b4cc0f5aSSatish Balay               }
569b4cc0f5aSSatish Balay             }
570fef45726SSatish Balay           } else {
571c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
572c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
573fef45726SSatish Balay                 baij_a[jj]  = *value++;
574fef45726SSatish Balay               }
575fef45726SSatish Balay             }
576fef45726SSatish Balay           }
5770bdbc534SSatish Balay         } else {
5780bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
579fef45726SSatish Balay           if (addv == ADD_VALUES) {
580b4cc0f5aSSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
5810bdbc534SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
582fef45726SSatish Balay                 baij_a[jj]  += *value++;
583fef45726SSatish Balay               }
584fef45726SSatish Balay             }
585fef45726SSatish Balay           } else {
586fef45726SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
587fef45726SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
588fef45726SSatish Balay                 baij_a[jj]  = *value++;
589fef45726SSatish Balay               }
590b4cc0f5aSSatish Balay             }
5910bdbc534SSatish Balay           }
5920bdbc534SSatish Balay         }
5930bdbc534SSatish Balay       }
5940bdbc534SSatish Balay     } else {
5950bdbc534SSatish Balay       if (!baij->donotstash) {
5960bdbc534SSatish Balay         if (roworiented) {
5978798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
5980bdbc534SSatish Balay         } else {
5998798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6000bdbc534SSatish Balay         }
6010bdbc534SSatish Balay       }
6020bdbc534SSatish Balay     }
6030bdbc534SSatish Balay   }
604aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
605187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
606187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
607187ce0cbSSatish Balay #endif
6080bdbc534SSatish Balay   PetscFunctionReturn(0);
6090bdbc534SSatish Balay }
610133cdb44SSatish Balay 
6110bdbc534SSatish Balay #undef __FUNC__
6125615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
613ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
614d6de1c52SSatish Balay {
615d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
616d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
61748e59246SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
618d6de1c52SSatish Balay 
619133cdb44SSatish Balay   PetscFunctionBegin;
620d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
621a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
622a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
623d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
624d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
625d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
626a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
627a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
628d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
629d6de1c52SSatish Balay           col = idxn[j] - bscstart;
63098dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
631d64ed03dSBarry Smith         } else {
632905e6a2fSBarry Smith           if (!baij->colmap) {
633905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
634905e6a2fSBarry Smith           }
635aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
6360f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
637fa46199cSSatish Balay           data --;
63848e59246SSatish Balay #else
63948e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
64048e59246SSatish Balay #endif
64148e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
642d9d09a02SSatish Balay           else {
64348e59246SSatish Balay             col  = data + idxn[j]%bs;
64498dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
645d6de1c52SSatish Balay           }
646d6de1c52SSatish Balay         }
647d6de1c52SSatish Balay       }
648d64ed03dSBarry Smith     } else {
649a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
650d6de1c52SSatish Balay     }
651d6de1c52SSatish Balay   }
6523a40ed3dSBarry Smith  PetscFunctionReturn(0);
653d6de1c52SSatish Balay }
654d6de1c52SSatish Balay 
6555615d1e5SSatish Balay #undef __FUNC__
6565615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
657ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
658d6de1c52SSatish Balay {
659d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
660d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
661acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
662d6de1c52SSatish Balay   double     sum = 0.0;
663d6de1c52SSatish Balay   Scalar     *v;
664d6de1c52SSatish Balay 
665d64ed03dSBarry Smith   PetscFunctionBegin;
666d6de1c52SSatish Balay   if (baij->size == 1) {
667d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
668d6de1c52SSatish Balay   } else {
669d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
670d6de1c52SSatish Balay       v = amat->a;
671d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
672aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
673e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
674d6de1c52SSatish Balay #else
675d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
676d6de1c52SSatish Balay #endif
677d6de1c52SSatish Balay       }
678d6de1c52SSatish Balay       v = bmat->a;
679d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
680aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
681e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
682d6de1c52SSatish Balay #else
683d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
684d6de1c52SSatish Balay #endif
685d6de1c52SSatish Balay       }
686ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
687d6de1c52SSatish Balay       *norm = sqrt(*norm);
688d64ed03dSBarry Smith     } else {
689e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
690d6de1c52SSatish Balay     }
691d64ed03dSBarry Smith   }
6923a40ed3dSBarry Smith   PetscFunctionReturn(0);
693d6de1c52SSatish Balay }
69457b952d6SSatish Balay 
695bd7f49f5SSatish Balay 
696fef45726SSatish Balay /*
697fef45726SSatish Balay   Creates the hash table, and sets the table
698fef45726SSatish Balay   This table is created only once.
699fef45726SSatish Balay   If new entried need to be added to the matrix
700fef45726SSatish Balay   then the hash table has to be destroyed and
701fef45726SSatish Balay   recreated.
702fef45726SSatish Balay */
703fef45726SSatish Balay #undef __FUNC__
704fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
705d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
706596b8d2eSBarry Smith {
707596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
708596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
709596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
7100bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
711549d3d68SSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
712187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
713fef45726SSatish Balay   int         *HT,key;
7140bdbc534SSatish Balay   Scalar      **HD;
715c2760754SSatish Balay   double      tmp;
716aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
7174a15367fSSatish Balay   int         ct=0,max=0;
7184a15367fSSatish Balay #endif
719fef45726SSatish Balay 
720d64ed03dSBarry Smith   PetscFunctionBegin;
7210bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
7220bdbc534SSatish Balay   size = baij->ht_size;
723fef45726SSatish Balay 
7240bdbc534SSatish Balay   if (baij->ht) {
7250bdbc534SSatish Balay     PetscFunctionReturn(0);
726596b8d2eSBarry Smith   }
7270bdbc534SSatish Balay 
728fef45726SSatish Balay   /* Allocate Memory for Hash Table */
729b9e4cc15SSatish Balay   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
730b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
731b9e4cc15SSatish Balay   HD = baij->hd;
732a07cd24cSSatish Balay   HT = baij->ht;
733b9e4cc15SSatish Balay 
734b9e4cc15SSatish Balay 
735549d3d68SSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
7360bdbc534SSatish Balay 
737596b8d2eSBarry Smith 
738596b8d2eSBarry Smith   /* Loop Over A */
7390bdbc534SSatish Balay   for ( i=0; i<a->mbs; i++ ) {
740596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
7410bdbc534SSatish Balay       row = i+rstart;
7420bdbc534SSatish Balay       col = aj[j]+cstart;
743596b8d2eSBarry Smith 
744187ce0cbSSatish Balay       key = row*Nbs + col + 1;
745c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7460bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7470bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7480bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7490bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
750596b8d2eSBarry Smith           break;
751aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
752187ce0cbSSatish Balay         } else {
753187ce0cbSSatish Balay           ct++;
754187ce0cbSSatish Balay #endif
755596b8d2eSBarry Smith         }
756187ce0cbSSatish Balay       }
757aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
758187ce0cbSSatish Balay       if (k> max) max = k;
759187ce0cbSSatish Balay #endif
760596b8d2eSBarry Smith     }
761596b8d2eSBarry Smith   }
762596b8d2eSBarry Smith   /* Loop Over B */
7630bdbc534SSatish Balay   for ( i=0; i<b->mbs; i++ ) {
764596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
7650bdbc534SSatish Balay       row = i+rstart;
7660bdbc534SSatish Balay       col = garray[bj[j]];
767187ce0cbSSatish Balay       key = row*Nbs + col + 1;
768c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7690bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7700bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7710bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7720bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
773596b8d2eSBarry Smith           break;
774aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
775187ce0cbSSatish Balay         } else {
776187ce0cbSSatish Balay           ct++;
777187ce0cbSSatish Balay #endif
778596b8d2eSBarry Smith         }
779187ce0cbSSatish Balay       }
780aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
781187ce0cbSSatish Balay       if (k> max) max = k;
782187ce0cbSSatish Balay #endif
783596b8d2eSBarry Smith     }
784596b8d2eSBarry Smith   }
785596b8d2eSBarry Smith 
786596b8d2eSBarry Smith   /* Print Summary */
787aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
788*c38d4ed2SBarry Smith   for ( i=0,j=0; i<size; i++) {
789596b8d2eSBarry Smith     if (HT[i]) {j++;}
790*c38d4ed2SBarry Smith   }
791*c38d4ed2SBarry Smith   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((double)(ct+j))/j,max);
792187ce0cbSSatish Balay #endif
7933a40ed3dSBarry Smith   PetscFunctionReturn(0);
794596b8d2eSBarry Smith }
79557b952d6SSatish Balay 
796bbb85fb3SSatish Balay #undef __FUNC__
797bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
798bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
799bbb85fb3SSatish Balay {
800bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
801ff2fd236SBarry Smith   int         ierr,nstash,reallocs;
802bbb85fb3SSatish Balay   InsertMode  addv;
803bbb85fb3SSatish Balay 
804bbb85fb3SSatish Balay   PetscFunctionBegin;
805bbb85fb3SSatish Balay   if (baij->donotstash) {
806bbb85fb3SSatish Balay     PetscFunctionReturn(0);
807bbb85fb3SSatish Balay   }
808bbb85fb3SSatish Balay 
809bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
810bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
811bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
812bbb85fb3SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
813bbb85fb3SSatish Balay   }
814bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
815bbb85fb3SSatish Balay 
8168798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
8178798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
8188798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8195a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
8208798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8215a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
822bbb85fb3SSatish Balay   PetscFunctionReturn(0);
823bbb85fb3SSatish Balay }
824bbb85fb3SSatish Balay 
825bbb85fb3SSatish Balay #undef __FUNC__
826bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
827bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
828bbb85fb3SSatish Balay {
829bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
830a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
831a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
832a2d1c673SSatish Balay   int         *row,*col,other_disassembled,r1,r2,r3;
833bbb85fb3SSatish Balay   Scalar      *val;
834bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
835bbb85fb3SSatish Balay 
836bbb85fb3SSatish Balay   PetscFunctionBegin;
837bbb85fb3SSatish Balay   if (!baij->donotstash) {
838a2d1c673SSatish Balay     while (1) {
8398798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
840a2d1c673SSatish Balay       if (!flg) break;
841a2d1c673SSatish Balay 
842bbb85fb3SSatish Balay       for ( i=0; i<n; ) {
843bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
844bbb85fb3SSatish Balay         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
845bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
846bbb85fb3SSatish Balay         else       ncols = n-i;
847bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
848bbb85fb3SSatish Balay         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
849bbb85fb3SSatish Balay         i = j;
850bbb85fb3SSatish Balay       }
851bbb85fb3SSatish Balay     }
8528798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
853a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
854a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
855a2d1c673SSatish Balay        restore the original flags */
856a2d1c673SSatish Balay     r1 = baij->roworiented;
857a2d1c673SSatish Balay     r2 = a->roworiented;
858a2d1c673SSatish Balay     r3 = b->roworiented;
859a2d1c673SSatish Balay     baij->roworiented = 0;
860a2d1c673SSatish Balay     a->roworiented    = 0;
861a2d1c673SSatish Balay     b->roworiented    = 0;
862a2d1c673SSatish Balay     while (1) {
8638798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
864a2d1c673SSatish Balay       if (!flg) break;
865a2d1c673SSatish Balay 
866a2d1c673SSatish Balay       for ( i=0; i<n; ) {
867a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
868a2d1c673SSatish Balay         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
869a2d1c673SSatish Balay         if (j < n) ncols = j-i;
870a2d1c673SSatish Balay         else       ncols = n-i;
871a2d1c673SSatish Balay         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
872a2d1c673SSatish Balay         i = j;
873a2d1c673SSatish Balay       }
874a2d1c673SSatish Balay     }
8758798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
876a2d1c673SSatish Balay     baij->roworiented = r1;
877a2d1c673SSatish Balay     a->roworiented    = r2;
878a2d1c673SSatish Balay     b->roworiented    = r3;
879bbb85fb3SSatish Balay   }
880bbb85fb3SSatish Balay 
881bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
882bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
883bbb85fb3SSatish Balay 
884bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
885bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
886bbb85fb3SSatish Balay   /*
887bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
888bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
889bbb85fb3SSatish Balay   */
890bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
891bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
892bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
893bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
894bbb85fb3SSatish Balay     }
895bbb85fb3SSatish Balay   }
896bbb85fb3SSatish Balay 
897bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
898bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
899bbb85fb3SSatish Balay   }
900bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
901bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
902bbb85fb3SSatish Balay 
903aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
904bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
905*c38d4ed2SBarry Smith     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
906bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
907bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
908bbb85fb3SSatish Balay   }
909bbb85fb3SSatish Balay #endif
910bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
911bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
912bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
913bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
914bbb85fb3SSatish Balay   }
915bbb85fb3SSatish Balay 
916606d414cSSatish Balay   if (baij->rowvalues) {
917606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
918606d414cSSatish Balay     baij->rowvalues = 0;
919606d414cSSatish Balay   }
920bbb85fb3SSatish Balay   PetscFunctionReturn(0);
921bbb85fb3SSatish Balay }
92257b952d6SSatish Balay 
923225e174dSSatish Balay /*
9245615d1e5SSatish Balay #undef __FUNC__
9255615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
92657b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
92757b952d6SSatish Balay {
92857b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
92957b952d6SSatish Balay   int          ierr;
93057b952d6SSatish Balay 
931d64ed03dSBarry Smith   PetscFunctionBegin;
93257b952d6SSatish Balay   if (baij->size == 1) {
93357b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
934a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
9353a40ed3dSBarry Smith   PetscFunctionReturn(0);
93657b952d6SSatish Balay }
937225e174dSSatish Balay */
93857b952d6SSatish Balay 
9395615d1e5SSatish Balay #undef __FUNC__
9407b2a1423SBarry Smith #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
9417b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
94257b952d6SSatish Balay {
94357b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
94477ed5343SBarry Smith   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
9456831982aSBarry Smith   PetscTruth   isascii,isdraw;
94657b952d6SSatish Balay 
947d64ed03dSBarry Smith   PetscFunctionBegin;
9486831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
9496831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
9500f5bd95cSBarry Smith   if (isascii) {
951d41123aaSBarry Smith     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
952639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
9534e220ebcSLois Curfman McInnes       MatInfo info;
954d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
955d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
9566831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
9574e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
9586831982aSBarry Smith               baij->bs,(int)info.memory);CHKERRQ(ierr);
959d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
9606831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
961d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
9626831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
9636831982aSBarry Smith       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
96457b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
9653a40ed3dSBarry Smith       PetscFunctionReturn(0);
966d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
9676831982aSBarry Smith       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
9683a40ed3dSBarry Smith       PetscFunctionReturn(0);
96957b952d6SSatish Balay     }
97057b952d6SSatish Balay   }
97157b952d6SSatish Balay 
9720f5bd95cSBarry Smith   if (isdraw) {
97357b952d6SSatish Balay     Draw       draw;
97457b952d6SSatish Balay     PetscTruth isnull;
97577ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
9763a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
97757b952d6SSatish Balay   }
97857b952d6SSatish Balay 
97957b952d6SSatish Balay   if (size == 1) {
98057b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
981d64ed03dSBarry Smith   } else {
98257b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
98357b952d6SSatish Balay     Mat         A;
98457b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
98540011551SBarry Smith     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
98657b952d6SSatish Balay     int         mbs = baij->mbs;
98757b952d6SSatish Balay     Scalar      *a;
98857b952d6SSatish Balay 
98957b952d6SSatish Balay     if (!rank) {
99055843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
991d64ed03dSBarry Smith     } else {
99255843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
99357b952d6SSatish Balay     }
99457b952d6SSatish Balay     PLogObjectParent(mat,A);
99557b952d6SSatish Balay 
99657b952d6SSatish Balay     /* copy over the A part */
99757b952d6SSatish Balay     Aloc  = (Mat_SeqBAIJ*) baij->A->data;
99857b952d6SSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
99957b952d6SSatish Balay     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
100057b952d6SSatish Balay 
100157b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
100257b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
100357b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
100457b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
100557b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
100657b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1007cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1008cee3aa6bSSatish Balay           col++; a += bs;
100957b952d6SSatish Balay         }
101057b952d6SSatish Balay       }
101157b952d6SSatish Balay     }
101257b952d6SSatish Balay     /* copy over the B part */
101357b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->B->data;
101457b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
101557b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
101657b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
101757b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
101857b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
101957b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
102057b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1021cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1022cee3aa6bSSatish Balay           col++; a += bs;
102357b952d6SSatish Balay         }
102457b952d6SSatish Balay       }
102557b952d6SSatish Balay     }
1026606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
10276d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10286d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
102955843e3eSBarry Smith     /*
103055843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
103155843e3eSBarry Smith        synchronized across all processors that share the Draw object
103255843e3eSBarry Smith     */
10336831982aSBarry Smith     if (!rank) {
10346831982aSBarry Smith       Viewer sviewer;
10356831982aSBarry Smith       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
10366831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
10376831982aSBarry Smith       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
103857b952d6SSatish Balay     }
103957b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
104057b952d6SSatish Balay   }
10413a40ed3dSBarry Smith   PetscFunctionReturn(0);
104257b952d6SSatish Balay }
104357b952d6SSatish Balay 
104457b952d6SSatish Balay 
104557b952d6SSatish Balay 
10465615d1e5SSatish Balay #undef __FUNC__
10475615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
1048e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer)
104957b952d6SSatish Balay {
105057b952d6SSatish Balay   int        ierr;
10516831982aSBarry Smith   PetscTruth isascii,isdraw,issocket,isbinary;
105257b952d6SSatish Balay 
1053d64ed03dSBarry Smith   PetscFunctionBegin;
10546831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
10556831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
10566831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
10576831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
10580f5bd95cSBarry Smith   if (isascii || isdraw || issocket || isbinary) {
10597b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
10605cd90555SBarry Smith   } else {
10610f5bd95cSBarry Smith     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
106257b952d6SSatish Balay   }
10633a40ed3dSBarry Smith   PetscFunctionReturn(0);
106457b952d6SSatish Balay }
106557b952d6SSatish Balay 
10665615d1e5SSatish Balay #undef __FUNC__
10675615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
1068e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
106979bdfe76SSatish Balay {
107079bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
107179bdfe76SSatish Balay   int         ierr;
107279bdfe76SSatish Balay 
1073d64ed03dSBarry Smith   PetscFunctionBegin;
107498dd23e9SBarry Smith 
107598dd23e9SBarry Smith   if (mat->mapping) {
107698dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
107798dd23e9SBarry Smith   }
107898dd23e9SBarry Smith   if (mat->bmapping) {
107998dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
108098dd23e9SBarry Smith   }
108161b13de0SBarry Smith   if (mat->rmap) {
108261b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
108361b13de0SBarry Smith   }
108461b13de0SBarry Smith   if (mat->cmap) {
108561b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
108661b13de0SBarry Smith   }
1087aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1088e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
108979bdfe76SSatish Balay #endif
109079bdfe76SSatish Balay 
10918798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
10928798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
10938798bf22SSatish Balay 
1094606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
109579bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
109679bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1097aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
10980f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
109948e59246SSatish Balay #else
1100606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
110148e59246SSatish Balay #endif
1102606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1103606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1104606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1105606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1106606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1107606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1108606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
110979bdfe76SSatish Balay   PLogObjectDestroy(mat);
111079bdfe76SSatish Balay   PetscHeaderDestroy(mat);
11113a40ed3dSBarry Smith   PetscFunctionReturn(0);
111279bdfe76SSatish Balay }
111379bdfe76SSatish Balay 
11145615d1e5SSatish Balay #undef __FUNC__
11155615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
1116ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1117cee3aa6bSSatish Balay {
1118cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
111947b4a8eaSLois Curfman McInnes   int         ierr, nt;
1120cee3aa6bSSatish Balay 
1121d64ed03dSBarry Smith   PetscFunctionBegin;
1122e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
112347b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1124a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
112547b4a8eaSLois Curfman McInnes   }
1126e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
112747b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1128a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
112947b4a8eaSLois Curfman McInnes   }
113043a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1131f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
113243a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1133f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
113443a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
11353a40ed3dSBarry Smith   PetscFunctionReturn(0);
1136cee3aa6bSSatish Balay }
1137cee3aa6bSSatish Balay 
11385615d1e5SSatish Balay #undef __FUNC__
11395615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
1140ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1141cee3aa6bSSatish Balay {
1142cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1143cee3aa6bSSatish Balay   int        ierr;
1144d64ed03dSBarry Smith 
1145d64ed03dSBarry Smith   PetscFunctionBegin;
114643a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1147f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
114843a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1149f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
11503a40ed3dSBarry Smith   PetscFunctionReturn(0);
1151cee3aa6bSSatish Balay }
1152cee3aa6bSSatish Balay 
11535615d1e5SSatish Balay #undef __FUNC__
11545615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
1155ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1156cee3aa6bSSatish Balay {
1157cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1158cee3aa6bSSatish Balay   int         ierr;
1159cee3aa6bSSatish Balay 
1160d64ed03dSBarry Smith   PetscFunctionBegin;
1161cee3aa6bSSatish Balay   /* do nondiagonal part */
1162f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1163cee3aa6bSSatish Balay   /* send it on its way */
1164537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1165cee3aa6bSSatish Balay   /* do local part */
1166f830108cSBarry Smith   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1167cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1168cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1169cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1170639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
11713a40ed3dSBarry Smith   PetscFunctionReturn(0);
1172cee3aa6bSSatish Balay }
1173cee3aa6bSSatish Balay 
11745615d1e5SSatish Balay #undef __FUNC__
11755615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1176ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1177cee3aa6bSSatish Balay {
1178cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1179cee3aa6bSSatish Balay   int         ierr;
1180cee3aa6bSSatish Balay 
1181d64ed03dSBarry Smith   PetscFunctionBegin;
1182cee3aa6bSSatish Balay   /* do nondiagonal part */
1183f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1184cee3aa6bSSatish Balay   /* send it on its way */
1185537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1186cee3aa6bSSatish Balay   /* do local part */
1187f830108cSBarry Smith   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1188cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1189cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1190cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1191537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
11923a40ed3dSBarry Smith   PetscFunctionReturn(0);
1193cee3aa6bSSatish Balay }
1194cee3aa6bSSatish Balay 
1195cee3aa6bSSatish Balay /*
1196cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1197cee3aa6bSSatish Balay    diagonal block
1198cee3aa6bSSatish Balay */
11995615d1e5SSatish Balay #undef __FUNC__
12005615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1201ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1202cee3aa6bSSatish Balay {
1203cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
12043a40ed3dSBarry Smith   int         ierr;
1205d64ed03dSBarry Smith 
1206d64ed03dSBarry Smith   PetscFunctionBegin;
1207a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
12083a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
12093a40ed3dSBarry Smith   PetscFunctionReturn(0);
1210cee3aa6bSSatish Balay }
1211cee3aa6bSSatish Balay 
12125615d1e5SSatish Balay #undef __FUNC__
12135615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
1214ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1215cee3aa6bSSatish Balay {
1216cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1217cee3aa6bSSatish Balay   int         ierr;
1218d64ed03dSBarry Smith 
1219d64ed03dSBarry Smith   PetscFunctionBegin;
1220cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1221cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
12223a40ed3dSBarry Smith   PetscFunctionReturn(0);
1223cee3aa6bSSatish Balay }
1224026e39d0SSatish Balay 
12255615d1e5SSatish Balay #undef __FUNC__
12265615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
1227ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
122857b952d6SSatish Balay {
122957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1230d64ed03dSBarry Smith 
1231d64ed03dSBarry Smith   PetscFunctionBegin;
1232bd7f49f5SSatish Balay   if (m) *m = mat->M;
1233bd7f49f5SSatish Balay   if (n) *n = mat->N;
12343a40ed3dSBarry Smith   PetscFunctionReturn(0);
123557b952d6SSatish Balay }
123657b952d6SSatish Balay 
12375615d1e5SSatish Balay #undef __FUNC__
12385615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1239ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
124057b952d6SSatish Balay {
124157b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1242d64ed03dSBarry Smith 
1243d64ed03dSBarry Smith   PetscFunctionBegin;
1244f830108cSBarry Smith   *m = mat->m; *n = mat->n;
12453a40ed3dSBarry Smith   PetscFunctionReturn(0);
124657b952d6SSatish Balay }
124757b952d6SSatish Balay 
12485615d1e5SSatish Balay #undef __FUNC__
12495615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1250ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
125157b952d6SSatish Balay {
125257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1253d64ed03dSBarry Smith 
1254d64ed03dSBarry Smith   PetscFunctionBegin;
125557b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
12563a40ed3dSBarry Smith   PetscFunctionReturn(0);
125757b952d6SSatish Balay }
125857b952d6SSatish Balay 
12595615d1e5SSatish Balay #undef __FUNC__
12605615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1261acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1262acdf5bf4SSatish Balay {
1263acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1264acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1265acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1266d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1267d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1268acdf5bf4SSatish Balay 
1269d64ed03dSBarry Smith   PetscFunctionBegin;
1270a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1271acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1272acdf5bf4SSatish Balay 
1273acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1274acdf5bf4SSatish Balay     /*
1275acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1276acdf5bf4SSatish Balay     */
1277acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1278bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1279bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1280acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1282acdf5bf4SSatish Balay     }
1283549d3d68SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1284acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1285acdf5bf4SSatish Balay   }
1286acdf5bf4SSatish Balay 
1287a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1288d9d09a02SSatish Balay   lrow = row - brstart;
1289acdf5bf4SSatish Balay 
1290acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1292acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295acdf5bf4SSatish Balay   nztot = nzA + nzB;
1296acdf5bf4SSatish Balay 
1297acdf5bf4SSatish Balay   cmap  = mat->garray;
1298acdf5bf4SSatish Balay   if (v  || idx) {
1299acdf5bf4SSatish Balay     if (nztot) {
1300acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1301acdf5bf4SSatish Balay       int imark = -1;
1302acdf5bf4SSatish Balay       if (v) {
1303acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1304acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1305d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1306acdf5bf4SSatish Balay           else break;
1307acdf5bf4SSatish Balay         }
1308acdf5bf4SSatish Balay         imark = i;
1309acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1310acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1311acdf5bf4SSatish Balay       }
1312acdf5bf4SSatish Balay       if (idx) {
1313acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1314acdf5bf4SSatish Balay         if (imark > -1) {
1315acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1316bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1317acdf5bf4SSatish Balay           }
1318acdf5bf4SSatish Balay         } else {
1319acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1320d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1321d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1322acdf5bf4SSatish Balay             else break;
1323acdf5bf4SSatish Balay           }
1324acdf5bf4SSatish Balay           imark = i;
1325acdf5bf4SSatish Balay         }
1326d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1327d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1328acdf5bf4SSatish Balay       }
1329d64ed03dSBarry Smith     } else {
1330d212a18eSSatish Balay       if (idx) *idx = 0;
1331d212a18eSSatish Balay       if (v)   *v   = 0;
1332d212a18eSSatish Balay     }
1333acdf5bf4SSatish Balay   }
1334acdf5bf4SSatish Balay   *nz = nztot;
1335f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1336f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
13373a40ed3dSBarry Smith   PetscFunctionReturn(0);
1338acdf5bf4SSatish Balay }
1339acdf5bf4SSatish Balay 
13405615d1e5SSatish Balay #undef __FUNC__
13415615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1342acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1343acdf5bf4SSatish Balay {
1344acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1345d64ed03dSBarry Smith 
1346d64ed03dSBarry Smith   PetscFunctionBegin;
1347acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1348a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1349acdf5bf4SSatish Balay   }
1350acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
13513a40ed3dSBarry Smith   PetscFunctionReturn(0);
1352acdf5bf4SSatish Balay }
1353acdf5bf4SSatish Balay 
13545615d1e5SSatish Balay #undef __FUNC__
13555615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1356ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
13575a838052SSatish Balay {
13585a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1359d64ed03dSBarry Smith 
1360d64ed03dSBarry Smith   PetscFunctionBegin;
13615a838052SSatish Balay   *bs = baij->bs;
13623a40ed3dSBarry Smith   PetscFunctionReturn(0);
13635a838052SSatish Balay }
13645a838052SSatish Balay 
13655615d1e5SSatish Balay #undef __FUNC__
13665615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1367ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
136858667388SSatish Balay {
136958667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
137058667388SSatish Balay   int         ierr;
1371d64ed03dSBarry Smith 
1372d64ed03dSBarry Smith   PetscFunctionBegin;
137358667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
137458667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
13753a40ed3dSBarry Smith   PetscFunctionReturn(0);
137658667388SSatish Balay }
13770ac07820SSatish Balay 
13785615d1e5SSatish Balay #undef __FUNC__
13795615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1380ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
13810ac07820SSatish Balay {
13824e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
13834e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
13847d57db60SLois Curfman McInnes   int         ierr;
13857d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
13860ac07820SSatish Balay 
1387d64ed03dSBarry Smith   PetscFunctionBegin;
13884e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
13894e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
13900e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1391de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
13924e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
13930e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1394de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
13950ac07820SSatish Balay   if (flag == MAT_LOCAL) {
13964e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
13974e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
13984e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
13994e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14004e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14010ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1402f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
14034e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14044e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14054e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14064e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14074e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14080ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1409f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
14104e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14114e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14124e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14134e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14144e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1415d41123aaSBarry Smith   } else {
1416d41123aaSBarry Smith     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
14170ac07820SSatish Balay   }
14185a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
14195a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
14205a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
14215a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
14224e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14234e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14244e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14253a40ed3dSBarry Smith   PetscFunctionReturn(0);
14260ac07820SSatish Balay }
14270ac07820SSatish Balay 
14285615d1e5SSatish Balay #undef __FUNC__
14295615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1430ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
143158667388SSatish Balay {
143258667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
143398305bb5SBarry Smith   int         ierr;
143458667388SSatish Balay 
1435d64ed03dSBarry Smith   PetscFunctionBegin;
143658667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
143758667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
14386da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1439c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
14404787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
14414787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
144298305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
144398305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1444b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1445aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
144698305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
144798305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1448b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
14496da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
145058667388SSatish Balay              op == MAT_SYMMETRIC ||
145158667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1452b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
145398305bb5SBarry Smith              op == MAT_USE_HASH_TABLE) {
145458667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
145598305bb5SBarry Smith   } else if (op == MAT_COLUMN_ORIENTED) {
145658667388SSatish Balay     a->roworiented = 0;
145798305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
145898305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
14592b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
146090f02eecSBarry Smith     a->donotstash = 1;
1461d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1462d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1463133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1464133cdb44SSatish Balay     a->ht_flag = 1;
1465d64ed03dSBarry Smith   } else {
1466d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1467d64ed03dSBarry Smith   }
14683a40ed3dSBarry Smith   PetscFunctionReturn(0);
146958667388SSatish Balay }
147058667388SSatish Balay 
14715615d1e5SSatish Balay #undef __FUNC__
14725615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1473ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
14740ac07820SSatish Balay {
14750ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
14760ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
14770ac07820SSatish Balay   Mat        B;
147840011551SBarry Smith   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
14790ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
14800ac07820SSatish Balay   Scalar     *a;
14810ac07820SSatish Balay 
1482d64ed03dSBarry Smith   PetscFunctionBegin;
1483a8c6a408SBarry Smith   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
148428937c7bSBarry Smith   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
14850ac07820SSatish Balay CHKERRQ(ierr);
14860ac07820SSatish Balay 
14870ac07820SSatish Balay   /* copy over the A part */
14880ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
14890ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
14900ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
14910ac07820SSatish Balay 
14920ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
14930ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
14940ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
14950ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
14960ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
14970ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
14980ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
14990ac07820SSatish Balay         col++; a += bs;
15000ac07820SSatish Balay       }
15010ac07820SSatish Balay     }
15020ac07820SSatish Balay   }
15030ac07820SSatish Balay   /* copy over the B part */
15040ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
15050ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15060ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
15070ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15080ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
15090ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
15100ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
15110ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
15120ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15130ac07820SSatish Balay         col++; a += bs;
15140ac07820SSatish Balay       }
15150ac07820SSatish Balay     }
15160ac07820SSatish Balay   }
1517606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
15180ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15190ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15200ac07820SSatish Balay 
15210ac07820SSatish Balay   if (matout != PETSC_NULL) {
15220ac07820SSatish Balay     *matout = B;
15230ac07820SSatish Balay   } else {
1524f830108cSBarry Smith     PetscOps *Abops;
1525cc2dc46cSBarry Smith     MatOps   Aops;
1526f830108cSBarry Smith 
15270ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
1528606d414cSSatish Balay     ierr = PetscFree(baij->rowners); CHKERRQ(ierr);
15290ac07820SSatish Balay     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
15300ac07820SSatish Balay     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1531aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
15320f5bd95cSBarry Smith     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1533b1fc9764SSatish Balay #else
1534606d414cSSatish Balay     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1535b1fc9764SSatish Balay #endif
1536606d414cSSatish Balay     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1537606d414cSSatish Balay     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1538606d414cSSatish Balay     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1539606d414cSSatish Balay     ierr = PetscFree(baij);CHKERRQ(ierr);
1540f830108cSBarry Smith 
1541f830108cSBarry Smith     /*
1542f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1543f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1544f830108cSBarry Smith       else from C.
1545f830108cSBarry Smith     */
1546f830108cSBarry Smith     Abops   = A->bops;
1547f830108cSBarry Smith     Aops    = A->ops;
1548549d3d68SSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1549f830108cSBarry Smith     A->bops = Abops;
1550f830108cSBarry Smith     A->ops  = Aops;
1551f830108cSBarry Smith 
15520ac07820SSatish Balay     PetscHeaderDestroy(B);
15530ac07820SSatish Balay   }
15543a40ed3dSBarry Smith   PetscFunctionReturn(0);
15550ac07820SSatish Balay }
15560e95ebc0SSatish Balay 
15575615d1e5SSatish Balay #undef __FUNC__
15585615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
155936c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
15600e95ebc0SSatish Balay {
156136c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
156236c4a09eSSatish Balay   Mat         a = baij->A, b = baij->B;
15630e95ebc0SSatish Balay   int         ierr,s1,s2,s3;
15640e95ebc0SSatish Balay 
1565d64ed03dSBarry Smith   PetscFunctionBegin;
156636c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
156736c4a09eSSatish Balay   if (rr) {
156836c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
156936c4a09eSSatish Balay     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
157036c4a09eSSatish Balay     /* Overlap communication with computation. */
157136c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
157236c4a09eSSatish Balay   }
15730e95ebc0SSatish Balay   if (ll) {
15740e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
157536c4a09eSSatish Balay     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
157636c4a09eSSatish Balay     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
15770e95ebc0SSatish Balay   }
157836c4a09eSSatish Balay   /* scale  the diagonal block */
157936c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
158036c4a09eSSatish Balay 
158136c4a09eSSatish Balay   if (rr) {
158236c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
158336c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
158436c4a09eSSatish Balay     ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr);
158536c4a09eSSatish Balay   }
158636c4a09eSSatish Balay 
15873a40ed3dSBarry Smith   PetscFunctionReturn(0);
15880e95ebc0SSatish Balay }
15890e95ebc0SSatish Balay 
15905615d1e5SSatish Balay #undef __FUNC__
15915615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1592ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
15930ac07820SSatish Balay {
15940ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
15950ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1596a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
15970ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
15980ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1599a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16000ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16010ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16020ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16030ac07820SSatish Balay   IS             istmp;
16040ac07820SSatish Balay 
1605d64ed03dSBarry Smith   PetscFunctionBegin;
16060ac07820SSatish Balay   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
16070ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
16080ac07820SSatish Balay 
16090ac07820SSatish Balay   /*  first count number of contributors to each processor */
16100ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1611549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1612549d3d68SSatish Balay   procs  = nprocs + size;
16130ac07820SSatish Balay   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
16140ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
16150ac07820SSatish Balay     idx   = rows[i];
16160ac07820SSatish Balay     found = 0;
16170ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
16180ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
16190ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
16200ac07820SSatish Balay       }
16210ac07820SSatish Balay     }
1622a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
16230ac07820SSatish Balay   }
16240ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
16250ac07820SSatish Balay 
16260ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
16276831982aSBarry Smith   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
16286831982aSBarry Smith   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
16290ac07820SSatish Balay   nmax   = work[rank];
16306831982aSBarry Smith   nrecvs = work[size+rank];
1631606d414cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
16320ac07820SSatish Balay 
16330ac07820SSatish Balay   /* post receives:   */
1634d64ed03dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1635d64ed03dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
16360ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
1637ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
16380ac07820SSatish Balay   }
16390ac07820SSatish Balay 
16400ac07820SSatish Balay   /* do sends:
16410ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
16420ac07820SSatish Balay      the ith processor
16430ac07820SSatish Balay   */
16440ac07820SSatish Balay   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1645ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
16460ac07820SSatish Balay   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
16470ac07820SSatish Balay   starts[0]  = 0;
16480ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
16490ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
16500ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
16510ac07820SSatish Balay   }
16526831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
16530ac07820SSatish Balay 
16540ac07820SSatish Balay   starts[0] = 0;
16550ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
16560ac07820SSatish Balay   count = 0;
16570ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
16580ac07820SSatish Balay     if (procs[i]) {
1659ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
16600ac07820SSatish Balay     }
16610ac07820SSatish Balay   }
1662606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
16630ac07820SSatish Balay 
16640ac07820SSatish Balay   base = owners[rank]*bs;
16650ac07820SSatish Balay 
16660ac07820SSatish Balay   /*  wait on receives */
16670ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
16680ac07820SSatish Balay   source = lens + nrecvs;
16690ac07820SSatish Balay   count  = nrecvs; slen = 0;
16700ac07820SSatish Balay   while (count) {
1671ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
16720ac07820SSatish Balay     /* unpack receives into our local space */
1673ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
16740ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
16750ac07820SSatish Balay     lens[imdex]    = n;
16760ac07820SSatish Balay     slen          += n;
16770ac07820SSatish Balay     count--;
16780ac07820SSatish Balay   }
1679606d414cSSatish Balay   ierr = PetscFree(recv_waits); CHKERRQ(ierr);
16800ac07820SSatish Balay 
16810ac07820SSatish Balay   /* move the data into the send scatter */
16820ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
16830ac07820SSatish Balay   count = 0;
16840ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
16850ac07820SSatish Balay     values = rvalues + i*nmax;
16860ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
16870ac07820SSatish Balay       lrows[count++] = values[j] - base;
16880ac07820SSatish Balay     }
16890ac07820SSatish Balay   }
1690606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1691606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1692606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1693606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
16940ac07820SSatish Balay 
16950ac07820SSatish Balay   /* actually zap the local rows */
1696029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
16970ac07820SSatish Balay   PLogObjectParent(A,istmp);
1698a07cd24cSSatish Balay 
169972dacd9aSBarry Smith   /*
170072dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
170172dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
170272dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
170372dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
170472dacd9aSBarry Smith 
170572dacd9aSBarry Smith        Contributed by: Mathew Knepley
170672dacd9aSBarry Smith   */
17079c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
17080ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
17099c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
17109c957beeSSatish Balay     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
17119c957beeSSatish Balay   } else if (diag) {
17129c957beeSSatish Balay     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1713fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1714fa46199cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1715fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
17166525c446SSatish Balay     }
1717a07cd24cSSatish Balay     for ( i = 0; i < slen; i++ ) {
1718a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
1719a07cd24cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1720a07cd24cSSatish Balay     }
1721a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17239c957beeSSatish Balay   } else {
17249c957beeSSatish Balay     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1725a07cd24cSSatish Balay   }
17269c957beeSSatish Balay 
17279c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1728606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1729a07cd24cSSatish Balay 
17300ac07820SSatish Balay   /* wait on sends */
17310ac07820SSatish Balay   if (nsends) {
1732d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1733ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1734606d414cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
17350ac07820SSatish Balay   }
1736606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1737606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
17380ac07820SSatish Balay 
17393a40ed3dSBarry Smith   PetscFunctionReturn(0);
17400ac07820SSatish Balay }
174172dacd9aSBarry Smith 
17425615d1e5SSatish Balay #undef __FUNC__
17435615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1744ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1745ba4ca20aSSatish Balay {
1746ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
174725fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1748133cdb44SSatish Balay   static int  called = 0;
17493a40ed3dSBarry Smith   int         ierr;
1750ba4ca20aSSatish Balay 
1751d64ed03dSBarry Smith   PetscFunctionBegin;
17523a40ed3dSBarry Smith   if (!a->rank) {
17533a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
175425fdafccSSatish Balay   }
175525fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1756d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1757d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
17583a40ed3dSBarry Smith   PetscFunctionReturn(0);
1759ba4ca20aSSatish Balay }
17600ac07820SSatish Balay 
17615615d1e5SSatish Balay #undef __FUNC__
17625615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1763ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1764bb5a7306SBarry Smith {
1765bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1766bb5a7306SBarry Smith   int         ierr;
1767d64ed03dSBarry Smith 
1768d64ed03dSBarry Smith   PetscFunctionBegin;
1769bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
17703a40ed3dSBarry Smith   PetscFunctionReturn(0);
1771bb5a7306SBarry Smith }
1772bb5a7306SBarry Smith 
17732e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
17740ac07820SSatish Balay 
17757fc3c18eSBarry Smith #undef __FUNC__
17767fc3c18eSBarry Smith #define __FUNC__ "MatEqual_MPIBAIJ"
17777fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
17787fc3c18eSBarry Smith {
17797fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
17807fc3c18eSBarry Smith   Mat         a, b, c, d;
17817fc3c18eSBarry Smith   PetscTruth  flg;
17827fc3c18eSBarry Smith   int         ierr;
17837fc3c18eSBarry Smith 
17847fc3c18eSBarry Smith   PetscFunctionBegin;
17857fc3c18eSBarry Smith   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
17867fc3c18eSBarry Smith   a = matA->A; b = matA->B;
17877fc3c18eSBarry Smith   c = matB->A; d = matB->B;
17887fc3c18eSBarry Smith 
17897fc3c18eSBarry Smith   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
17907fc3c18eSBarry Smith   if (flg == PETSC_TRUE) {
17917fc3c18eSBarry Smith     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
17927fc3c18eSBarry Smith   }
17937fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
17947fc3c18eSBarry Smith   PetscFunctionReturn(0);
17957fc3c18eSBarry Smith }
17967fc3c18eSBarry Smith 
179779bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1798cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1799cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1800cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1801cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1802cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1803cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
1804cc2dc46cSBarry Smith   MatMultTrans_MPIBAIJ,
1805cc2dc46cSBarry Smith   MatMultTransAdd_MPIBAIJ,
1806cc2dc46cSBarry Smith   0,
1807cc2dc46cSBarry Smith   0,
1808cc2dc46cSBarry Smith   0,
1809cc2dc46cSBarry Smith   0,
1810cc2dc46cSBarry Smith   0,
1811cc2dc46cSBarry Smith   0,
1812cc2dc46cSBarry Smith   0,
1813cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1814cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
18157fc3c18eSBarry Smith   MatEqual_MPIBAIJ,
1816cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1817cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1818cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1819cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1820cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1821cc2dc46cSBarry Smith   0,
1822cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1823cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1824cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1825cc2dc46cSBarry Smith   0,
1826cc2dc46cSBarry Smith   0,
1827cc2dc46cSBarry Smith   0,
1828cc2dc46cSBarry Smith   0,
1829cc2dc46cSBarry Smith   MatGetSize_MPIBAIJ,
1830cc2dc46cSBarry Smith   MatGetLocalSize_MPIBAIJ,
1831cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1832cc2dc46cSBarry Smith   0,
1833cc2dc46cSBarry Smith   0,
1834cc2dc46cSBarry Smith   0,
1835cc2dc46cSBarry Smith   0,
18362e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1837cc2dc46cSBarry Smith   0,
1838cc2dc46cSBarry Smith   0,
1839cc2dc46cSBarry Smith   0,
1840cc2dc46cSBarry Smith   0,
1841cc2dc46cSBarry Smith   0,
1842cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1843cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1844cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1845cc2dc46cSBarry Smith   0,
1846cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1847cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1848cc2dc46cSBarry Smith   0,
1849cc2dc46cSBarry Smith   0,
1850cc2dc46cSBarry Smith   0,
1851cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1852cc2dc46cSBarry Smith   0,
1853cc2dc46cSBarry Smith   0,
1854cc2dc46cSBarry Smith   0,
1855cc2dc46cSBarry Smith   0,
1856cc2dc46cSBarry Smith   0,
1857cc2dc46cSBarry Smith   0,
1858cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
1859cc2dc46cSBarry Smith   0,
1860cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
1861cc2dc46cSBarry Smith   0,
1862cc2dc46cSBarry Smith   0,
1863cc2dc46cSBarry Smith   0,
1864cc2dc46cSBarry Smith   MatGetMaps_Petsc};
186579bdfe76SSatish Balay 
18665ef9f2a5SBarry Smith 
1867e18c124aSSatish Balay EXTERN_C_BEGIN
18685ef9f2a5SBarry Smith #undef __FUNC__
18695ef9f2a5SBarry Smith #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
18705ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
18715ef9f2a5SBarry Smith {
18725ef9f2a5SBarry Smith   PetscFunctionBegin;
18735ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
18745ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
18755ef9f2a5SBarry Smith   PetscFunctionReturn(0);
18765ef9f2a5SBarry Smith }
1877e18c124aSSatish Balay EXTERN_C_END
187879bdfe76SSatish Balay 
18795615d1e5SSatish Balay #undef __FUNC__
18805615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
188179bdfe76SSatish Balay /*@C
188279bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
188379bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
188479bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
188579bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
188679bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
188779bdfe76SSatish Balay 
1888db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1889db81eaa0SLois Curfman McInnes 
189079bdfe76SSatish Balay    Input Parameters:
1891db81eaa0SLois Curfman McInnes +  comm - MPI communicator
189279bdfe76SSatish Balay .  bs   - size of blockk
189379bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
189492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
189592e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
189692e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
189792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
189892e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
1899be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1900be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
190179bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
190279bdfe76SSatish Balay            submatrix  (same for all local rows)
19037fc3c18eSBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
190492e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
1905db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
190692e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
190779bdfe76SSatish Balay            submatrix (same for all local rows).
19087fc3c18eSBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
190992e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
191092e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
191179bdfe76SSatish Balay 
191279bdfe76SSatish Balay    Output Parameter:
191379bdfe76SSatish Balay .  A - the matrix
191479bdfe76SSatish Balay 
1915db81eaa0SLois Curfman McInnes    Options Database Keys:
1916db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
1917db81eaa0SLois Curfman McInnes                      block calculations (much slower)
1918db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
1919494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
1920494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
19213ffaccefSLois Curfman McInnes 
1922b259b22eSLois Curfman McInnes    Notes:
192379bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
192479bdfe76SSatish Balay    (possibly both).
192579bdfe76SSatish Balay 
1926be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1927be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
1928be79a94dSBarry Smith 
192979bdfe76SSatish Balay    Storage Information:
193079bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
193179bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
193279bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
193379bdfe76SSatish Balay    local matrix (a rectangular submatrix).
193479bdfe76SSatish Balay 
193579bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
193679bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
193779bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
193879bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
193979bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
194079bdfe76SSatish Balay 
194179bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
194279bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
194379bdfe76SSatish Balay 
1944db81eaa0SLois Curfman McInnes .vb
1945db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
1946db81eaa0SLois Curfman McInnes           -------------------
1947db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
1948db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
1949db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
1950db81eaa0SLois Curfman McInnes           -------------------
1951db81eaa0SLois Curfman McInnes .ve
195279bdfe76SSatish Balay 
195379bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
195479bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
195579bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
195657b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
195779bdfe76SSatish Balay 
1958d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1959d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
196079bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
196192e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
196292e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
19636da5968aSLois Curfman McInnes    matrices.
196479bdfe76SSatish Balay 
1965027ccd11SLois Curfman McInnes    Level: intermediate
1966027ccd11SLois Curfman McInnes 
196792e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
196879bdfe76SSatish Balay 
1969db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
197079bdfe76SSatish Balay @*/
197179bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
197279bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
197379bdfe76SSatish Balay {
197479bdfe76SSatish Balay   Mat          B;
197579bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
1976f1af5d2fSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
1977f1af5d2fSBarry Smith   PetscTruth   flag1,flag2,flg;
197879bdfe76SSatish Balay 
1979d64ed03dSBarry Smith   PetscFunctionBegin;
1980962fb4a0SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1981962fb4a0SBarry Smith 
1982a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
198336c4a09eSSatish Balay   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
198436c4a09eSSatish Balay   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
19854fdb0a08SBarry Smith   if (d_nnz) {
198636c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
19874fdb0a08SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
19884fdb0a08SBarry Smith     }
19894fdb0a08SBarry Smith   }
19904fdb0a08SBarry Smith   if (o_nnz) {
199136c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
19924fdb0a08SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
19934fdb0a08SBarry Smith     }
19944fdb0a08SBarry Smith   }
19953914022bSBarry Smith 
1996d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1997494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
1998494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1999494eafd4SBarry Smith   if (!flag1 && !flag2 && size == 1) {
20003914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
20013914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
20023914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
20033a40ed3dSBarry Smith     PetscFunctionReturn(0);
20043914022bSBarry Smith   }
20053914022bSBarry Smith 
200679bdfe76SSatish Balay   *A = 0;
20073f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
200879bdfe76SSatish Balay   PLogObjectCreate(B);
200979bdfe76SSatish Balay   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2010549d3d68SSatish Balay   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2011549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
20124c50302cSBarry Smith 
2013e1311b90SBarry Smith   B->ops->destroy    = MatDestroy_MPIBAIJ;
2014e1311b90SBarry Smith   B->ops->view       = MatView_MPIBAIJ;
201590f02eecSBarry Smith   B->mapping    = 0;
201679bdfe76SSatish Balay   B->factor     = 0;
201779bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
201879bdfe76SSatish Balay 
2019e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
2020d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2021d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
202279bdfe76SSatish Balay 
2023d64ed03dSBarry Smith   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2024a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2025d64ed03dSBarry Smith   }
2026a8c6a408SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2027a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2028a8c6a408SBarry Smith   }
2029a8c6a408SBarry Smith   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2030a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2031a8c6a408SBarry Smith   }
2032cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2033cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
203479bdfe76SSatish Balay 
203579bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
203679bdfe76SSatish Balay     work[0] = m; work[1] = n;
203779bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
2038ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
203979bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
204079bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
204179bdfe76SSatish Balay   }
204279bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
204379bdfe76SSatish Balay     Mbs = M/bs;
2044a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
204579bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
204679bdfe76SSatish Balay     m   = mbs*bs;
204779bdfe76SSatish Balay   }
204879bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
204979bdfe76SSatish Balay     Nbs = N/bs;
2050a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
205179bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
205279bdfe76SSatish Balay     n   = nbs*bs;
205379bdfe76SSatish Balay   }
2054a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
2055a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2056a8c6a408SBarry Smith   }
205779bdfe76SSatish Balay 
205879bdfe76SSatish Balay   b->m = m; B->m = m;
205979bdfe76SSatish Balay   b->n = n; B->n = n;
206079bdfe76SSatish Balay   b->N = N; B->N = N;
206179bdfe76SSatish Balay   b->M = M; B->M = M;
206279bdfe76SSatish Balay   b->bs  = bs;
206379bdfe76SSatish Balay   b->bs2 = bs*bs;
206479bdfe76SSatish Balay   b->mbs = mbs;
206579bdfe76SSatish Balay   b->nbs = nbs;
206679bdfe76SSatish Balay   b->Mbs = Mbs;
206779bdfe76SSatish Balay   b->Nbs = Nbs;
206879bdfe76SSatish Balay 
2069c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
2070c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
2071488ecbafSBarry Smith   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2072488ecbafSBarry Smith   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2073c7fcc2eaSBarry Smith 
207479bdfe76SSatish Balay   /* build local table of row and column ownerships */
2075ff2fd236SBarry Smith   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2076ff2fd236SBarry Smith   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
20770ac07820SSatish Balay   b->cowners    = b->rowners + b->size + 2;
2078ff2fd236SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2079ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
208079bdfe76SSatish Balay   b->rowners[0]    = 0;
208179bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
208279bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
208379bdfe76SSatish Balay   }
2084ff2fd236SBarry Smith   for ( i=0; i<=b->size; i++ ) {
2085ff2fd236SBarry Smith     b->rowners_bs[i] = b->rowners[i]*bs;
2086ff2fd236SBarry Smith   }
208779bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
208879bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
20894fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
20904fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
20914fa0d573SSatish Balay 
2092ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
209379bdfe76SSatish Balay   b->cowners[0] = 0;
209479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
209579bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
209679bdfe76SSatish Balay   }
209779bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
209879bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
20994fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
21004fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
210179bdfe76SSatish Balay 
2102a07cd24cSSatish Balay 
210379bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2104029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
210579bdfe76SSatish Balay   PLogObjectParent(B,b->A);
210679bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2107029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
210879bdfe76SSatish Balay   PLogObjectParent(B,b->B);
210979bdfe76SSatish Balay 
211079bdfe76SSatish Balay   /* build cache for off array entries formed */
21118798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
21128798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
211390f02eecSBarry Smith   b->donotstash  = 0;
211479bdfe76SSatish Balay   b->colmap      = 0;
211579bdfe76SSatish Balay   b->garray      = 0;
211679bdfe76SSatish Balay   b->roworiented = 1;
211779bdfe76SSatish Balay 
211830793edcSSatish Balay   /* stuff used in block assembly */
211930793edcSSatish Balay   b->barray       = 0;
212030793edcSSatish Balay 
212179bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
212279bdfe76SSatish Balay   b->lvec         = 0;
212379bdfe76SSatish Balay   b->Mvctx        = 0;
212479bdfe76SSatish Balay 
212579bdfe76SSatish Balay   /* stuff for MatGetRow() */
212679bdfe76SSatish Balay   b->rowindices   = 0;
212779bdfe76SSatish Balay   b->rowvalues    = 0;
212879bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
212979bdfe76SSatish Balay 
2130a07cd24cSSatish Balay   /* hash table stuff */
2131a07cd24cSSatish Balay   b->ht           = 0;
2132187ce0cbSSatish Balay   b->hd           = 0;
21330bdbc534SSatish Balay   b->ht_size      = 0;
2134133cdb44SSatish Balay   b->ht_flag      = 0;
213525fdafccSSatish Balay   b->ht_fact      = 0;
2136187ce0cbSSatish Balay   b->ht_total_ct  = 0;
2137187ce0cbSSatish Balay   b->ht_insert_ct = 0;
2138a07cd24cSSatish Balay 
213979bdfe76SSatish Balay   *A = B;
2140133cdb44SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2141133cdb44SSatish Balay   if (flg) {
2142133cdb44SSatish Balay     double fact = 1.39;
2143133cdb44SSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2144f1af5d2fSBarry Smith     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2145133cdb44SSatish Balay     if (fact <= 1.0) fact = 1.39;
2146133cdb44SSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2147133cdb44SSatish Balay     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2148133cdb44SSatish Balay   }
2149f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
21507fc3c18eSBarry Smith                                      "MatStoreValues_MPIBAIJ",
21517fc3c18eSBarry Smith                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2152f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
21537fc3c18eSBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
21547fc3c18eSBarry Smith                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2155f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
21565ef9f2a5SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
21575ef9f2a5SBarry Smith                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
21583a40ed3dSBarry Smith   PetscFunctionReturn(0);
215979bdfe76SSatish Balay }
2160026e39d0SSatish Balay 
21615615d1e5SSatish Balay #undef __FUNC__
21622e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ"
21632e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
21640ac07820SSatish Balay {
21650ac07820SSatish Balay   Mat         mat;
21660ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2167f1af5d2fSBarry Smith   int         ierr, len=0;
2168f1af5d2fSBarry Smith   PetscTruth  flg;
21690ac07820SSatish Balay 
2170d64ed03dSBarry Smith   PetscFunctionBegin;
21710ac07820SSatish Balay   *newmat       = 0;
21723f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
21730ac07820SSatish Balay   PLogObjectCreate(mat);
21740ac07820SSatish Balay   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2175549d3d68SSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2176e1311b90SBarry Smith   mat->ops->destroy = MatDestroy_MPIBAIJ;
2177e1311b90SBarry Smith   mat->ops->view    = MatView_MPIBAIJ;
21780ac07820SSatish Balay   mat->factor       = matin->factor;
21790ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
2180aef5e8e0SSatish Balay   mat->insertmode   = NOT_SET_VALUES;
21810ac07820SSatish Balay 
21820ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
21830ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
21840ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
21850ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
21860ac07820SSatish Balay 
21870ac07820SSatish Balay   a->bs  = oldmat->bs;
21880ac07820SSatish Balay   a->bs2 = oldmat->bs2;
21890ac07820SSatish Balay   a->mbs = oldmat->mbs;
21900ac07820SSatish Balay   a->nbs = oldmat->nbs;
21910ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
21920ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
21930ac07820SSatish Balay 
21940ac07820SSatish Balay   a->rstart       = oldmat->rstart;
21950ac07820SSatish Balay   a->rend         = oldmat->rend;
21960ac07820SSatish Balay   a->cstart       = oldmat->cstart;
21970ac07820SSatish Balay   a->cend         = oldmat->cend;
21980ac07820SSatish Balay   a->size         = oldmat->size;
21990ac07820SSatish Balay   a->rank         = oldmat->rank;
2200aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2201aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2202aef5e8e0SSatish Balay   a->rowindices   = 0;
22030ac07820SSatish Balay   a->rowvalues    = 0;
22040ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
220530793edcSSatish Balay   a->barray       = 0;
22063123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
22073123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
22083123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
22093123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
22100ac07820SSatish Balay 
2211133cdb44SSatish Balay   /* hash table stuff */
2212133cdb44SSatish Balay   a->ht           = 0;
2213133cdb44SSatish Balay   a->hd           = 0;
2214133cdb44SSatish Balay   a->ht_size      = 0;
2215133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
221625fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2217133cdb44SSatish Balay   a->ht_total_ct  = 0;
2218133cdb44SSatish Balay   a->ht_insert_ct = 0;
2219133cdb44SSatish Balay 
2220133cdb44SSatish Balay 
2221ff2fd236SBarry Smith   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2222ff2fd236SBarry Smith   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
22230ac07820SSatish Balay   a->cowners    = a->rowners + a->size + 2;
2224ff2fd236SBarry Smith   a->rowners_bs = a->cowners + a->size + 2;
2225549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
22268798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
22278798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
22280ac07820SSatish Balay   if (oldmat->colmap) {
2229aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
22300f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
223148e59246SSatish Balay #else
22320ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
22330ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2234549d3d68SSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
223548e59246SSatish Balay #endif
22360ac07820SSatish Balay   } else a->colmap = 0;
22370ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
22380ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
22390ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
2240549d3d68SSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
22410ac07820SSatish Balay   } else a->garray = 0;
22420ac07820SSatish Balay 
22430ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
22440ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
22450ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2246e18c124aSSatish Balay 
22470ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
22482e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
22490ac07820SSatish Balay   PLogObjectParent(mat,a->A);
22502e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
22510ac07820SSatish Balay   PLogObjectParent(mat,a->B);
22520ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2253e18c124aSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
22540ac07820SSatish Balay   if (flg) {
22550ac07820SSatish Balay     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
22560ac07820SSatish Balay   }
22570ac07820SSatish Balay   *newmat = mat;
22583a40ed3dSBarry Smith   PetscFunctionReturn(0);
22590ac07820SSatish Balay }
226057b952d6SSatish Balay 
226157b952d6SSatish Balay #include "sys.h"
226257b952d6SSatish Balay 
22635615d1e5SSatish Balay #undef __FUNC__
22645615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
226557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
226657b952d6SSatish Balay {
226757b952d6SSatish Balay   Mat          A;
226857b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
226957b952d6SSatish Balay   Scalar       *vals,*buf;
227057b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
227157b952d6SSatish Balay   MPI_Status   status;
2272cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
227357b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2274f1af5d2fSBarry Smith   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
227557b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
227657b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
227757b952d6SSatish Balay 
2278d64ed03dSBarry Smith   PetscFunctionBegin;
2279f1af5d2fSBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
228057b952d6SSatish Balay 
2281d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2282d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
228357b952d6SSatish Balay   if (!rank) {
228457b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2285e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2286a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2287d64ed03dSBarry Smith     if (header[3] < 0) {
2288a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2289d64ed03dSBarry Smith     }
22906c5fab8fSBarry Smith   }
2291d64ed03dSBarry Smith 
2292ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
229357b952d6SSatish Balay   M = header[1]; N = header[2];
229457b952d6SSatish Balay 
2295a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
229657b952d6SSatish Balay 
229757b952d6SSatish Balay   /*
229857b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
229957b952d6SSatish Balay      divisible by the blocksize
230057b952d6SSatish Balay   */
230157b952d6SSatish Balay   Mbs        = M/bs;
230257b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
230357b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
230457b952d6SSatish Balay   else                  Mbs++;
230557b952d6SSatish Balay   if (extra_rows &&!rank) {
2306b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
230757b952d6SSatish Balay   }
2308537820f0SBarry Smith 
230957b952d6SSatish Balay   /* determine ownership of all rows */
231057b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
231157b952d6SSatish Balay   m   = mbs * bs;
2312cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2313cee3aa6bSSatish Balay   browners = rowners + size + 1;
2314ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
231557b952d6SSatish Balay   rowners[0] = 0;
2316cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2317cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
231857b952d6SSatish Balay   rstart = rowners[rank];
231957b952d6SSatish Balay   rend   = rowners[rank+1];
232057b952d6SSatish Balay 
232157b952d6SSatish Balay   /* distribute row lengths to all processors */
232257b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
232357b952d6SSatish Balay   if (!rank) {
232457b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2325e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
232657b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
232757b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2328cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2329ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2330606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2331d64ed03dSBarry Smith   } else {
2332ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
233357b952d6SSatish Balay   }
233457b952d6SSatish Balay 
233557b952d6SSatish Balay   if (!rank) {
233657b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
233757b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2338549d3d68SSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
233957b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
234057b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
234157b952d6SSatish Balay         procsnz[i] += rowlengths[j];
234257b952d6SSatish Balay       }
234357b952d6SSatish Balay     }
2344606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
234557b952d6SSatish Balay 
234657b952d6SSatish Balay     /* determine max buffer needed and allocate it */
234757b952d6SSatish Balay     maxnz = 0;
234857b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
234957b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
235057b952d6SSatish Balay     }
235157b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
235257b952d6SSatish Balay 
235357b952d6SSatish Balay     /* read in my part of the matrix column indices  */
235457b952d6SSatish Balay     nz = procsnz[0];
235557b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
235657b952d6SSatish Balay     mycols = ibuf;
2357cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2358e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2359cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2360cee3aa6bSSatish Balay 
236157b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
236257b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
236357b952d6SSatish Balay       nz   = procsnz[i];
2364e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2365ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
236657b952d6SSatish Balay     }
236757b952d6SSatish Balay     /* read in the stuff for the last proc */
236857b952d6SSatish Balay     if ( size != 1 ) {
236957b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2370e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
237157b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2372ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
237357b952d6SSatish Balay     }
2374606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2375d64ed03dSBarry Smith   } else {
237657b952d6SSatish Balay     /* determine buffer space needed for message */
237757b952d6SSatish Balay     nz = 0;
237857b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
237957b952d6SSatish Balay       nz += locrowlens[i];
238057b952d6SSatish Balay     }
238157b952d6SSatish Balay     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
238257b952d6SSatish Balay     mycols = ibuf;
238357b952d6SSatish Balay     /* receive message of column indices*/
2384ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2385ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2386a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
238757b952d6SSatish Balay   }
238857b952d6SSatish Balay 
238957b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2390cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2391cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
239257b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2393549d3d68SSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
239457b952d6SSatish Balay   masked1 = mask    + Mbs;
239557b952d6SSatish Balay   masked2 = masked1 + Mbs;
239657b952d6SSatish Balay   rowcount = 0; nzcount = 0;
239757b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
239857b952d6SSatish Balay     dcount  = 0;
239957b952d6SSatish Balay     odcount = 0;
240057b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
240157b952d6SSatish Balay       kmax = locrowlens[rowcount];
240257b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
240357b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
240457b952d6SSatish Balay         if (!mask[tmp]) {
240557b952d6SSatish Balay           mask[tmp] = 1;
240657b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
240757b952d6SSatish Balay           else masked1[dcount++] = tmp;
240857b952d6SSatish Balay         }
240957b952d6SSatish Balay       }
241057b952d6SSatish Balay       rowcount++;
241157b952d6SSatish Balay     }
2412cee3aa6bSSatish Balay 
241357b952d6SSatish Balay     dlens[i]  = dcount;
241457b952d6SSatish Balay     odlens[i] = odcount;
2415cee3aa6bSSatish Balay 
241657b952d6SSatish Balay     /* zero out the mask elements we set */
241757b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
241857b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
241957b952d6SSatish Balay   }
2420cee3aa6bSSatish Balay 
242157b952d6SSatish Balay   /* create our matrix */
2422549d3d68SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
242357b952d6SSatish Balay   A = *newmat;
24246d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
242557b952d6SSatish Balay 
242657b952d6SSatish Balay   if (!rank) {
242757b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
242857b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
242957b952d6SSatish Balay     nz = procsnz[0];
243057b952d6SSatish Balay     vals = buf;
2431cee3aa6bSSatish Balay     mycols = ibuf;
2432cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2433e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2434cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2435537820f0SBarry Smith 
243657b952d6SSatish Balay     /* insert into matrix */
243757b952d6SSatish Balay     jj      = rstart*bs;
243857b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
243957b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
244057b952d6SSatish Balay       mycols += locrowlens[i];
244157b952d6SSatish Balay       vals   += locrowlens[i];
244257b952d6SSatish Balay       jj++;
244357b952d6SSatish Balay     }
244457b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
244557b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
244657b952d6SSatish Balay       nz   = procsnz[i];
244757b952d6SSatish Balay       vals = buf;
2448e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2449ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
245057b952d6SSatish Balay     }
245157b952d6SSatish Balay     /* the last proc */
245257b952d6SSatish Balay     if ( size != 1 ){
245357b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2454cee3aa6bSSatish Balay       vals = buf;
2455e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
245657b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2457ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
245857b952d6SSatish Balay     }
2459606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2460d64ed03dSBarry Smith   } else {
246157b952d6SSatish Balay     /* receive numeric values */
246257b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
246357b952d6SSatish Balay 
246457b952d6SSatish Balay     /* receive message of values*/
246557b952d6SSatish Balay     vals   = buf;
2466cee3aa6bSSatish Balay     mycols = ibuf;
2467ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2468ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2469a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
247057b952d6SSatish Balay 
247157b952d6SSatish Balay     /* insert into matrix */
247257b952d6SSatish Balay     jj      = rstart*bs;
2473cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
247457b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
247557b952d6SSatish Balay       mycols += locrowlens[i];
247657b952d6SSatish Balay       vals   += locrowlens[i];
247757b952d6SSatish Balay       jj++;
247857b952d6SSatish Balay     }
247957b952d6SSatish Balay   }
2480606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2481606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2482606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2483606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2484606d414cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2485606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
24866d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
24876d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
24883a40ed3dSBarry Smith   PetscFunctionReturn(0);
248957b952d6SSatish Balay }
249057b952d6SSatish Balay 
2491133cdb44SSatish Balay #undef __FUNC__
2492133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2493133cdb44SSatish Balay /*@
2494133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2495133cdb44SSatish Balay 
2496133cdb44SSatish Balay    Input Parameters:
2497133cdb44SSatish Balay .  mat  - the matrix
2498133cdb44SSatish Balay .  fact - factor
2499133cdb44SSatish Balay 
2500fee21e36SBarry Smith    Collective on Mat
2501fee21e36SBarry Smith 
25028c890885SBarry Smith    Level: advanced
25038c890885SBarry Smith 
2504133cdb44SSatish Balay   Notes:
2505133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2506133cdb44SSatish Balay 
2507133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2508133cdb44SSatish Balay 
2509133cdb44SSatish Balay .seealso: MatSetOption()
2510133cdb44SSatish Balay @*/
2511133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2512133cdb44SSatish Balay {
251325fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2514133cdb44SSatish Balay 
2515133cdb44SSatish Balay   PetscFunctionBegin;
2516133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
251725fdafccSSatish Balay   if (mat->type != MATMPIBAIJ) {
2518133cdb44SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2519133cdb44SSatish Balay   }
2520133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*) mat->data;
2521133cdb44SSatish Balay   baij->ht_fact = fact;
2522133cdb44SSatish Balay   PetscFunctionReturn(0);
2523133cdb44SSatish Balay }
2524