xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 93fea6af079e4055664eecaca9855c2a79060c37)
1*93fea6afSBarry Smith /*$Id: mpibaij.c,v 1.189 2000/04/09 04:36:25 bsmith Exp bsmith $*/
279bdfe76SSatish Balay 
377ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
579bdfe76SSatish Balay 
657b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
757b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
8d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
97b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
16*93fea6afSBarry Smith extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*);
17*93fea6afSBarry Smith 
18*93fea6afSBarry Smith /*  UGLY, ugly, ugly
19*93fea6afSBarry Smith    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20*93fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21*93fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22*93fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
23*93fea6afSBarry Smith    into the single precision data structures.
24*93fea6afSBarry Smith */
25*93fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
26*93fea6afSBarry Smith extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27*93fea6afSBarry Smith extern int MatSetValuesBlock_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28*93fea6afSBarry Smith extern int MatSetValuesBlock_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29*93fea6afSBarry Smith #else
30*93fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ
31*93fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ
32*93fea6afSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
33*93fea6afSBarry Smith #endif
343b2fbd54SBarry Smith 
357fc3c18eSBarry Smith EXTERN_C_BEGIN
367fc3c18eSBarry Smith #undef __FUNC__
37b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ"
387fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat)
397fc3c18eSBarry Smith {
407fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
417fc3c18eSBarry Smith   int         ierr;
427fc3c18eSBarry Smith 
437fc3c18eSBarry Smith   PetscFunctionBegin;
447fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
457fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
467fc3c18eSBarry Smith   PetscFunctionReturn(0);
477fc3c18eSBarry Smith }
487fc3c18eSBarry Smith EXTERN_C_END
497fc3c18eSBarry Smith 
507fc3c18eSBarry Smith EXTERN_C_BEGIN
517fc3c18eSBarry Smith #undef __FUNC__
52b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ"
537fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat)
547fc3c18eSBarry Smith {
557fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
567fc3c18eSBarry Smith   int         ierr;
577fc3c18eSBarry Smith 
587fc3c18eSBarry Smith   PetscFunctionBegin;
597fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
607fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
617fc3c18eSBarry Smith   PetscFunctionReturn(0);
627fc3c18eSBarry Smith }
637fc3c18eSBarry Smith EXTERN_C_END
647fc3c18eSBarry Smith 
65537820f0SBarry Smith /*
66537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
6757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
6857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
6957b952d6SSatish Balay    length of colmap equals the global matrix length.
7057b952d6SSatish Balay */
715615d1e5SSatish Balay #undef __FUNC__
72b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private"
7357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
7457b952d6SSatish Balay {
7557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
7657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
77dc2900e9SSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
7857b952d6SSatish Balay 
79d64ed03dSBarry Smith   PetscFunctionBegin;
80aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
810f5bd95cSBarry Smith   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
8248e59246SSatish Balay   for (i=0; i<nbs; i++){
830f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
8448e59246SSatish Balay   }
8548e59246SSatish Balay #else
86758f045eSSatish Balay   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
8757b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
88549d3d68SSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
89928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
9048e59246SSatish Balay #endif
913a40ed3dSBarry Smith   PetscFunctionReturn(0);
9257b952d6SSatish Balay }
9357b952d6SSatish Balay 
9480c1aa95SSatish Balay #define CHUNKSIZE  10
9580c1aa95SSatish Balay 
96f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
9780c1aa95SSatish Balay { \
9880c1aa95SSatish Balay  \
9980c1aa95SSatish Balay     brow = row/bs;  \
10080c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
101ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
10280c1aa95SSatish Balay       bcol = col/bs; \
10380c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
104ab26458aSBarry Smith       low = 0; high = nrow; \
105ab26458aSBarry Smith       while (high-low > 3) { \
106ab26458aSBarry Smith         t = (low+high)/2; \
107ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
108ab26458aSBarry Smith         else              low  = t; \
109ab26458aSBarry Smith       } \
110ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
11180c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
11280c1aa95SSatish Balay         if (rp[_i] == bcol) { \
11380c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
114eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
115eada6651SSatish Balay           else                    *bap  = value;  \
116ac7a638eSSatish Balay           goto a_noinsert; \
11780c1aa95SSatish Balay         } \
11880c1aa95SSatish Balay       } \
11989280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
120a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
12180c1aa95SSatish Balay       if (nrow >= rmax) { \
12280c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
12380c1aa95SSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
1243eda8832SBarry Smith         MatScalar *new_a; \
12580c1aa95SSatish Balay  \
126a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
12789280ab3SLois Curfman McInnes  \
12880c1aa95SSatish Balay         /* malloc new storage space */ \
1293eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
1303eda8832SBarry Smith         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
13180c1aa95SSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
13280c1aa95SSatish Balay         new_i   = new_j + new_nz; \
13380c1aa95SSatish Balay  \
13480c1aa95SSatish Balay         /* copy over old data into new slots */ \
13580c1aa95SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
13680c1aa95SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
137549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
13880c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
1393eda8832SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
1403eda8832SBarry Smith         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
142549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
1433eda8832SBarry Smith                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
14480c1aa95SSatish Balay         /* free up old matrix storage */ \
145606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
146606d414cSSatish Balay         if (!a->singlemalloc) { \
147606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
148606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
149606d414cSSatish Balay         } \
15080c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1517c922b88SBarry Smith         a->singlemalloc = PETSC_TRUE; \
15280c1aa95SSatish Balay  \
15380c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
154ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
1553eda8832SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
15680c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
15780c1aa95SSatish Balay         a->reallocs++; \
15880c1aa95SSatish Balay         a->nz++; \
15980c1aa95SSatish Balay       } \
16080c1aa95SSatish Balay       N = nrow++ - 1;  \
16180c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
16280c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
16380c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1643eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
16580c1aa95SSatish Balay       } \
1663eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
16780c1aa95SSatish Balay       rp[_i]                      = bcol;  \
16880c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
169ac7a638eSSatish Balay       a_noinsert:; \
17080c1aa95SSatish Balay     ailen[brow] = nrow; \
17180c1aa95SSatish Balay }
17257b952d6SSatish Balay 
173ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
174ac7a638eSSatish Balay { \
175ac7a638eSSatish Balay     brow = row/bs;  \
176ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
177ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
178ac7a638eSSatish Balay       bcol = col/bs; \
179ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
180ac7a638eSSatish Balay       low = 0; high = nrow; \
181ac7a638eSSatish Balay       while (high-low > 3) { \
182ac7a638eSSatish Balay         t = (low+high)/2; \
183ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
184ac7a638eSSatish Balay         else              low  = t; \
185ac7a638eSSatish Balay       } \
186ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
187ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
188ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
189ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
190ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
191ac7a638eSSatish Balay           else                    *bap  = value;  \
192ac7a638eSSatish Balay           goto b_noinsert; \
193ac7a638eSSatish Balay         } \
194ac7a638eSSatish Balay       } \
19589280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
196a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
197ac7a638eSSatish Balay       if (nrow >= rmax) { \
198ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
19974c639caSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
2003eda8832SBarry Smith         MatScalar *new_a; \
201ac7a638eSSatish Balay  \
202a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
20389280ab3SLois Curfman McInnes  \
204ac7a638eSSatish Balay         /* malloc new storage space */ \
2053eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
2063eda8832SBarry Smith         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
207ac7a638eSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
208ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
209ac7a638eSSatish Balay  \
210ac7a638eSSatish Balay         /* copy over old data into new slots */ \
211ac7a638eSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
21274c639caSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
213549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
214ac7a638eSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
2153eda8832SBarry Smith         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
2163eda8832SBarry Smith         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
2173eda8832SBarry Smith         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
218549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
2193eda8832SBarry Smith                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
220ac7a638eSSatish Balay         /* free up old matrix storage */ \
221606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
222606d414cSSatish Balay         if (!b->singlemalloc) { \
223606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
224606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
225606d414cSSatish Balay         } \
22674c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
2277c922b88SBarry Smith         b->singlemalloc = PETSC_TRUE; \
228ac7a638eSSatish Balay  \
229ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
230ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
2313eda8832SBarry Smith         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
23274c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
23374c639caSSatish Balay         b->reallocs++; \
23474c639caSSatish Balay         b->nz++; \
235ac7a638eSSatish Balay       } \
236ac7a638eSSatish Balay       N = nrow++ - 1;  \
237ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
238ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
239ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2403eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
241ac7a638eSSatish Balay       } \
2423eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
243ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
244ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
245ac7a638eSSatish Balay       b_noinsert:; \
246ac7a638eSSatish Balay     bilen[brow] = nrow; \
247ac7a638eSSatish Balay }
248ac7a638eSSatish Balay 
249*93fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2505615d1e5SSatish Balay #undef __FUNC__
251b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ"
252ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
25357b952d6SSatish Balay {
254*93fea6afSBarry Smith   int       ierr,i,N = m*n;
255*93fea6afSBarry Smith   MatScalar *vsingle = (MatScalar*)v;
256*93fea6afSBarry Smith 
257*93fea6afSBarry Smith   PetscFunctionBegin;
258*93fea6afSBarry Smith   for (i=0; i<N; i++) {
259*93fea6afSBarry Smith     vsingle[i] = v[i];
260*93fea6afSBarry Smith   }
261*93fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
262*93fea6afSBarry Smith   PetscFunctionReturn(0);
263*93fea6afSBarry Smith }
264*93fea6afSBarry Smith 
265*93fea6afSBarry Smith #undef __FUNC__
266*93fea6afSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ"
267*93fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
268*93fea6afSBarry Smith {
269*93fea6afSBarry Smith   int       ierr,i,N = m*n*((Mat_MPIBAIJ*)mat->data)->bs2;
270*93fea6afSBarry Smith   MatScalar *vsingle = (MatScalar*)v;
271*93fea6afSBarry Smith 
272*93fea6afSBarry Smith   PetscFunctionBegin;
273*93fea6afSBarry Smith   for (i=0; i<N; i++) {
274*93fea6afSBarry Smith     vsingle[i] = v[i];
275*93fea6afSBarry Smith   }
276*93fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
277*93fea6afSBarry Smith   PetscFunctionReturn(0);
278*93fea6afSBarry Smith }
279*93fea6afSBarry Smith #endif
280*93fea6afSBarry Smith 
281*93fea6afSBarry Smith #undef __FUNC__
282*93fea6afSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ"
283*93fea6afSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
284*93fea6afSBarry Smith {
28557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
286*93fea6afSBarry Smith   MatScalar   value;
2874fa0d573SSatish Balay   int         ierr,i,j,row,col;
2884fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2894fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2904fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
29157b952d6SSatish Balay 
292eada6651SSatish Balay   /* Some Variables required in the macro */
29380c1aa95SSatish Balay   Mat         A = baij->A;
29480c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
295ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
2963eda8832SBarry Smith   MatScalar   *aa=a->a;
297ac7a638eSSatish Balay 
298ac7a638eSSatish Balay   Mat         B = baij->B;
299ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
300ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3013eda8832SBarry Smith   MatScalar   *ba=b->a;
302ac7a638eSSatish Balay 
303ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
304ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
3053eda8832SBarry Smith   MatScalar   *ap,*bap;
30680c1aa95SSatish Balay 
307d64ed03dSBarry Smith   PetscFunctionBegin;
30857b952d6SSatish Balay   for (i=0; i<m; i++) {
3095ef9f2a5SBarry Smith     if (im[i] < 0) continue;
310aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
311a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
312639f9d9dSBarry Smith #endif
31357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
31457b952d6SSatish Balay       row = im[i] - rstart_orig;
31557b952d6SSatish Balay       for (j=0; j<n; j++) {
31657b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
31757b952d6SSatish Balay           col = in[j] - cstart_orig;
31857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
319f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
32080c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
32173959e64SBarry Smith         } else if (in[j] < 0) continue;
322aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
323a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
324639f9d9dSBarry Smith #endif
32557b952d6SSatish Balay         else {
32657b952d6SSatish Balay           if (mat->was_assembled) {
327905e6a2fSBarry Smith             if (!baij->colmap) {
328905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
329905e6a2fSBarry Smith             }
330aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
3310f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
332fa46199cSSatish Balay             col  = col - 1 + in[j]%bs;
33348e59246SSatish Balay #else
334905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
33548e59246SSatish Balay #endif
33657b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
33757b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
33857b952d6SSatish Balay               col =  in[j];
3399bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
3409bf004c3SSatish Balay               B = baij->B;
3419bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
3429bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
3439bf004c3SSatish Balay               ba=b->a;
34457b952d6SSatish Balay             }
345d64ed03dSBarry Smith           } else col = in[j];
34657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
347ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
348ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
34957b952d6SSatish Balay         }
35057b952d6SSatish Balay       }
351d64ed03dSBarry Smith     } else {
35290f02eecSBarry Smith       if (!baij->donotstash) {
353ff2fd236SBarry Smith         if (roworiented) {
3548798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
355ff2fd236SBarry Smith         } else {
3568798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
35757b952d6SSatish Balay         }
35857b952d6SSatish Balay       }
35957b952d6SSatish Balay     }
36090f02eecSBarry Smith   }
3613a40ed3dSBarry Smith   PetscFunctionReturn(0);
36257b952d6SSatish Balay }
36357b952d6SSatish Balay 
364ab26458aSBarry Smith #undef __FUNC__
365b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ"
366*93fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
367ab26458aSBarry Smith {
368ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
369*93fea6afSBarry Smith   MatScalar   *value,*barray=baij->barray;
3707ef1d9bdSSatish Balay   int         ierr,i,j,ii,jj,row,col;
371ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
372ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
373ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
374ab26458aSBarry Smith 
375b16ae2b1SBarry Smith   PetscFunctionBegin;
37630793edcSSatish Balay   if(!barray) {
377*93fea6afSBarry Smith     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
37830793edcSSatish Balay   }
37930793edcSSatish Balay 
380ab26458aSBarry Smith   if (roworiented) {
381ab26458aSBarry Smith     stepval = (n-1)*bs;
382ab26458aSBarry Smith   } else {
383ab26458aSBarry Smith     stepval = (m-1)*bs;
384ab26458aSBarry Smith   }
385ab26458aSBarry Smith   for (i=0; i<m; i++) {
3865ef9f2a5SBarry Smith     if (im[i] < 0) continue;
387aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
3885ef9f2a5SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
389ab26458aSBarry Smith #endif
390ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
391ab26458aSBarry Smith       row = im[i] - rstart;
392ab26458aSBarry Smith       for (j=0; j<n; j++) {
39315b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
39415b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
39515b57d14SSatish Balay           barray = v + i*bs2;
39615b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
39715b57d14SSatish Balay           barray = v + j*bs2;
39815b57d14SSatish Balay         } else { /* Here a copy is required */
399ab26458aSBarry Smith           if (roworiented) {
400ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
401ab26458aSBarry Smith           } else {
402ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
403abef11f7SSatish Balay           }
40447513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
40547513183SBarry Smith             for (jj=0; jj<bs; jj++) {
40630793edcSSatish Balay               *barray++  = *value++;
40747513183SBarry Smith             }
40847513183SBarry Smith           }
40930793edcSSatish Balay           barray -=bs2;
41015b57d14SSatish Balay         }
411abef11f7SSatish Balay 
412abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
413abef11f7SSatish Balay           col  = in[j] - cstart;
414*93fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
415ab26458aSBarry Smith         }
4165ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
417aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4185ef9f2a5SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
419ab26458aSBarry Smith #endif
420ab26458aSBarry Smith         else {
421ab26458aSBarry Smith           if (mat->was_assembled) {
422ab26458aSBarry Smith             if (!baij->colmap) {
423ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
424ab26458aSBarry Smith             }
425a5eb4965SSatish Balay 
426aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
427aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
428fa46199cSSatish Balay             { int data;
4290f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4300f5bd95cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
431fa46199cSSatish Balay             }
43248e59246SSatish Balay #else
4330f5bd95cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
434a5eb4965SSatish Balay #endif
43548e59246SSatish Balay #endif
436aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4370f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
438fa46199cSSatish Balay             col  = (col - 1)/bs;
43948e59246SSatish Balay #else
440a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
44148e59246SSatish Balay #endif
442ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
443ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
444ab26458aSBarry Smith               col =  in[j];
445ab26458aSBarry Smith             }
446ab26458aSBarry Smith           }
447ab26458aSBarry Smith           else col = in[j];
448*93fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
449ab26458aSBarry Smith         }
450ab26458aSBarry Smith       }
451d64ed03dSBarry Smith     } else {
452ab26458aSBarry Smith       if (!baij->donotstash) {
453ff2fd236SBarry Smith         if (roworiented) {
4548798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
455ff2fd236SBarry Smith         } else {
4568798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
457ff2fd236SBarry Smith         }
458abef11f7SSatish Balay       }
459ab26458aSBarry Smith     }
460ab26458aSBarry Smith   }
4613a40ed3dSBarry Smith   PetscFunctionReturn(0);
462ab26458aSBarry Smith }
4630bdbc534SSatish Balay #define HASH_KEY 0.6180339887
464c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
465c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
466c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
4675615d1e5SSatish Balay #undef __FUNC__
468b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT"
4690bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4700bdbc534SSatish Balay {
4710bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
4720bdbc534SSatish Balay   int         ierr,i,j,row,col;
4730bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
474c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
475c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
476329f5518SBarry Smith   PetscReal   tmp;
4773eda8832SBarry Smith   MatScalar   ** HD = baij->hd,value;
478aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4794a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4804a15367fSSatish Balay #endif
4810bdbc534SSatish Balay 
4820bdbc534SSatish Balay   PetscFunctionBegin;
4830bdbc534SSatish Balay 
4840bdbc534SSatish Balay   for (i=0; i<m; i++) {
485aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4860bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4870bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4880bdbc534SSatish Balay #endif
4890bdbc534SSatish Balay       row = im[i];
490c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4910bdbc534SSatish Balay       for (j=0; j<n; j++) {
4920bdbc534SSatish Balay         col = in[j];
4933eda8832SBarry Smith         if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m];
4940bdbc534SSatish Balay         /* Look up into the Hash Table */
495c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
496c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4970bdbc534SSatish Balay 
498c2760754SSatish Balay 
499c2760754SSatish Balay         idx = h1;
500aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
501187ce0cbSSatish Balay         insert_ct++;
502187ce0cbSSatish Balay         total_ct++;
503187ce0cbSSatish Balay         if (HT[idx] != key) {
504187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
505187ce0cbSSatish Balay           if (idx == size) {
506187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
507187ce0cbSSatish Balay             if (idx == h1) {
508187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
509187ce0cbSSatish Balay             }
510187ce0cbSSatish Balay           }
511187ce0cbSSatish Balay         }
512187ce0cbSSatish Balay #else
513c2760754SSatish Balay         if (HT[idx] != key) {
514c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
515c2760754SSatish Balay           if (idx == size) {
516c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
517c2760754SSatish Balay             if (idx == h1) {
518c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
519c2760754SSatish Balay             }
520c2760754SSatish Balay           }
521c2760754SSatish Balay         }
522187ce0cbSSatish Balay #endif
523c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
524c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
525c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
5260bdbc534SSatish Balay       }
5270bdbc534SSatish Balay     } else {
5280bdbc534SSatish Balay       if (!baij->donotstash) {
529ff2fd236SBarry Smith         if (roworiented) {
5308798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
531ff2fd236SBarry Smith         } else {
5328798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5330bdbc534SSatish Balay         }
5340bdbc534SSatish Balay       }
5350bdbc534SSatish Balay     }
5360bdbc534SSatish Balay   }
537aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
538187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
539187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
540187ce0cbSSatish Balay #endif
5410bdbc534SSatish Balay   PetscFunctionReturn(0);
5420bdbc534SSatish Balay }
5430bdbc534SSatish Balay 
5440bdbc534SSatish Balay #undef __FUNC__
545b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT"
5460bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
5470bdbc534SSatish Balay {
5480bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
5498798bf22SSatish Balay   int         ierr,i,j,ii,jj,row,col;
5500bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
551b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
552c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
553329f5518SBarry Smith   PetscReal   tmp;
5543eda8832SBarry Smith   MatScalar   **HD = baij->hd,*baij_a;
5553eda8832SBarry Smith   Scalar      *v_t,*value;
556aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5574a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5584a15367fSSatish Balay #endif
5590bdbc534SSatish Balay 
560d0a41580SSatish Balay   PetscFunctionBegin;
561d0a41580SSatish Balay 
5620bdbc534SSatish Balay   if (roworiented) {
5630bdbc534SSatish Balay     stepval = (n-1)*bs;
5640bdbc534SSatish Balay   } else {
5650bdbc534SSatish Balay     stepval = (m-1)*bs;
5660bdbc534SSatish Balay   }
5670bdbc534SSatish Balay   for (i=0; i<m; i++) {
568aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5690bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
5700bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
5710bdbc534SSatish Balay #endif
5720bdbc534SSatish Balay     row   = im[i];
573187ce0cbSSatish Balay     v_t   = v + i*bs2;
574c2760754SSatish Balay     if (row >= rstart && row < rend) {
5750bdbc534SSatish Balay       for (j=0; j<n; j++) {
5760bdbc534SSatish Balay         col = in[j];
5770bdbc534SSatish Balay 
5780bdbc534SSatish Balay         /* Look up into the Hash Table */
579c2760754SSatish Balay         key = row*Nbs+col+1;
580c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5810bdbc534SSatish Balay 
582c2760754SSatish Balay         idx = h1;
583aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
584187ce0cbSSatish Balay         total_ct++;
585187ce0cbSSatish Balay         insert_ct++;
586187ce0cbSSatish Balay        if (HT[idx] != key) {
587187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
588187ce0cbSSatish Balay           if (idx == size) {
589187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
590187ce0cbSSatish Balay             if (idx == h1) {
591187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
592187ce0cbSSatish Balay             }
593187ce0cbSSatish Balay           }
594187ce0cbSSatish Balay         }
595187ce0cbSSatish Balay #else
596c2760754SSatish Balay         if (HT[idx] != key) {
597c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
598c2760754SSatish Balay           if (idx == size) {
599c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
600c2760754SSatish Balay             if (idx == h1) {
601c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
602c2760754SSatish Balay             }
603c2760754SSatish Balay           }
604c2760754SSatish Balay         }
605187ce0cbSSatish Balay #endif
606c2760754SSatish Balay         baij_a = HD[idx];
6070bdbc534SSatish Balay         if (roworiented) {
608c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
609187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
610187ce0cbSSatish Balay           value = v_t;
611187ce0cbSSatish Balay           v_t  += bs;
612fef45726SSatish Balay           if (addv == ADD_VALUES) {
613c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
614c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
615fef45726SSatish Balay                 baij_a[jj]  += *value++;
616b4cc0f5aSSatish Balay               }
617b4cc0f5aSSatish Balay             }
618fef45726SSatish Balay           } else {
619c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
620c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
621fef45726SSatish Balay                 baij_a[jj]  = *value++;
622fef45726SSatish Balay               }
623fef45726SSatish Balay             }
624fef45726SSatish Balay           }
6250bdbc534SSatish Balay         } else {
6260bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
627fef45726SSatish Balay           if (addv == ADD_VALUES) {
628b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
6290bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
630fef45726SSatish Balay                 baij_a[jj]  += *value++;
631fef45726SSatish Balay               }
632fef45726SSatish Balay             }
633fef45726SSatish Balay           } else {
634fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
635fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
636fef45726SSatish Balay                 baij_a[jj]  = *value++;
637fef45726SSatish Balay               }
638b4cc0f5aSSatish Balay             }
6390bdbc534SSatish Balay           }
6400bdbc534SSatish Balay         }
6410bdbc534SSatish Balay       }
6420bdbc534SSatish Balay     } else {
6430bdbc534SSatish Balay       if (!baij->donotstash) {
6440bdbc534SSatish Balay         if (roworiented) {
6458798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6460bdbc534SSatish Balay         } else {
6478798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6480bdbc534SSatish Balay         }
6490bdbc534SSatish Balay       }
6500bdbc534SSatish Balay     }
6510bdbc534SSatish Balay   }
652aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
653187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
654187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
655187ce0cbSSatish Balay #endif
6560bdbc534SSatish Balay   PetscFunctionReturn(0);
6570bdbc534SSatish Balay }
658133cdb44SSatish Balay 
6590bdbc534SSatish Balay #undef __FUNC__
660b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ"
661ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
662d6de1c52SSatish Balay {
663d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
664d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
66548e59246SSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
666d6de1c52SSatish Balay 
667133cdb44SSatish Balay   PetscFunctionBegin;
668d6de1c52SSatish Balay   for (i=0; i<m; i++) {
669a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
670a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
671d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
672d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
673d6de1c52SSatish Balay       for (j=0; j<n; j++) {
674a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
675a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
676d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
677d6de1c52SSatish Balay           col = idxn[j] - bscstart;
67898dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
679d64ed03dSBarry Smith         } else {
680905e6a2fSBarry Smith           if (!baij->colmap) {
681905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
682905e6a2fSBarry Smith           }
683aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
6840f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
685fa46199cSSatish Balay           data --;
68648e59246SSatish Balay #else
68748e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
68848e59246SSatish Balay #endif
68948e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
690d9d09a02SSatish Balay           else {
69148e59246SSatish Balay             col  = data + idxn[j]%bs;
69298dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
693d6de1c52SSatish Balay           }
694d6de1c52SSatish Balay         }
695d6de1c52SSatish Balay       }
696d64ed03dSBarry Smith     } else {
697a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
698d6de1c52SSatish Balay     }
699d6de1c52SSatish Balay   }
7003a40ed3dSBarry Smith  PetscFunctionReturn(0);
701d6de1c52SSatish Balay }
702d6de1c52SSatish Balay 
7035615d1e5SSatish Balay #undef __FUNC__
704b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ"
705329f5518SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
706d6de1c52SSatish Balay {
707d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
708d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
709acdf5bf4SSatish Balay   int        ierr,i,bs2=baij->bs2;
710329f5518SBarry Smith   PetscReal  sum = 0.0;
7113eda8832SBarry Smith   MatScalar  *v;
712d6de1c52SSatish Balay 
713d64ed03dSBarry Smith   PetscFunctionBegin;
714d6de1c52SSatish Balay   if (baij->size == 1) {
715d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
716d6de1c52SSatish Balay   } else {
717d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
718d6de1c52SSatish Balay       v = amat->a;
719d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++) {
720aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
721329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
722d6de1c52SSatish Balay #else
723d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
724d6de1c52SSatish Balay #endif
725d6de1c52SSatish Balay       }
726d6de1c52SSatish Balay       v = bmat->a;
727d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++) {
728aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
729329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
730d6de1c52SSatish Balay #else
731d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
732d6de1c52SSatish Balay #endif
733d6de1c52SSatish Balay       }
734ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
735d6de1c52SSatish Balay       *norm = sqrt(*norm);
736d64ed03dSBarry Smith     } else {
737e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
738d6de1c52SSatish Balay     }
739d64ed03dSBarry Smith   }
7403a40ed3dSBarry Smith   PetscFunctionReturn(0);
741d6de1c52SSatish Balay }
74257b952d6SSatish Balay 
743bd7f49f5SSatish Balay 
744fef45726SSatish Balay /*
745fef45726SSatish Balay   Creates the hash table, and sets the table
746fef45726SSatish Balay   This table is created only once.
747fef45726SSatish Balay   If new entried need to be added to the matrix
748fef45726SSatish Balay   then the hash table has to be destroyed and
749fef45726SSatish Balay   recreated.
750fef45726SSatish Balay */
751fef45726SSatish Balay #undef __FUNC__
752b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private"
753329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
754596b8d2eSBarry Smith {
755596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
756596b8d2eSBarry Smith   Mat         A = baij->A,B=baij->B;
757596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
7580bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
759549d3d68SSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
760187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
761fef45726SSatish Balay   int         *HT,key;
7623eda8832SBarry Smith   MatScalar   **HD;
763329f5518SBarry Smith   PetscReal   tmp;
764aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
7654a15367fSSatish Balay   int         ct=0,max=0;
7664a15367fSSatish Balay #endif
767fef45726SSatish Balay 
768d64ed03dSBarry Smith   PetscFunctionBegin;
7690bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
7700bdbc534SSatish Balay   size = baij->ht_size;
771fef45726SSatish Balay 
7720bdbc534SSatish Balay   if (baij->ht) {
7730bdbc534SSatish Balay     PetscFunctionReturn(0);
774596b8d2eSBarry Smith   }
7750bdbc534SSatish Balay 
776fef45726SSatish Balay   /* Allocate Memory for Hash Table */
7773eda8832SBarry Smith   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
778b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
779b9e4cc15SSatish Balay   HD = baij->hd;
780a07cd24cSSatish Balay   HT = baij->ht;
781b9e4cc15SSatish Balay 
782b9e4cc15SSatish Balay 
783549d3d68SSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
7840bdbc534SSatish Balay 
785596b8d2eSBarry Smith 
786596b8d2eSBarry Smith   /* Loop Over A */
7870bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
788596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
7890bdbc534SSatish Balay       row = i+rstart;
7900bdbc534SSatish Balay       col = aj[j]+cstart;
791596b8d2eSBarry Smith 
792187ce0cbSSatish Balay       key = row*Nbs + col + 1;
793c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7940bdbc534SSatish Balay       for (k=0; k<size; k++){
7950bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7960bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7970bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
798596b8d2eSBarry Smith           break;
799aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
800187ce0cbSSatish Balay         } else {
801187ce0cbSSatish Balay           ct++;
802187ce0cbSSatish Balay #endif
803596b8d2eSBarry Smith         }
804187ce0cbSSatish Balay       }
805aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
806187ce0cbSSatish Balay       if (k> max) max = k;
807187ce0cbSSatish Balay #endif
808596b8d2eSBarry Smith     }
809596b8d2eSBarry Smith   }
810596b8d2eSBarry Smith   /* Loop Over B */
8110bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
812596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
8130bdbc534SSatish Balay       row = i+rstart;
8140bdbc534SSatish Balay       col = garray[bj[j]];
815187ce0cbSSatish Balay       key = row*Nbs + col + 1;
816c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8170bdbc534SSatish Balay       for (k=0; k<size; k++){
8180bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8190bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8200bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
821596b8d2eSBarry Smith           break;
822aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
823187ce0cbSSatish Balay         } else {
824187ce0cbSSatish Balay           ct++;
825187ce0cbSSatish Balay #endif
826596b8d2eSBarry Smith         }
827187ce0cbSSatish Balay       }
828aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
829187ce0cbSSatish Balay       if (k> max) max = k;
830187ce0cbSSatish Balay #endif
831596b8d2eSBarry Smith     }
832596b8d2eSBarry Smith   }
833596b8d2eSBarry Smith 
834596b8d2eSBarry Smith   /* Print Summary */
835aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
836c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
837596b8d2eSBarry Smith     if (HT[i]) {j++;}
838c38d4ed2SBarry Smith   }
839329f5518SBarry Smith   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
840187ce0cbSSatish Balay #endif
8413a40ed3dSBarry Smith   PetscFunctionReturn(0);
842596b8d2eSBarry Smith }
84357b952d6SSatish Balay 
844bbb85fb3SSatish Balay #undef __FUNC__
845b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ"
846bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
847bbb85fb3SSatish Balay {
848bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
849ff2fd236SBarry Smith   int         ierr,nstash,reallocs;
850bbb85fb3SSatish Balay   InsertMode  addv;
851bbb85fb3SSatish Balay 
852bbb85fb3SSatish Balay   PetscFunctionBegin;
853bbb85fb3SSatish Balay   if (baij->donotstash) {
854bbb85fb3SSatish Balay     PetscFunctionReturn(0);
855bbb85fb3SSatish Balay   }
856bbb85fb3SSatish Balay 
857bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
858bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
859bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
860bbb85fb3SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
861bbb85fb3SSatish Balay   }
862bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
863bbb85fb3SSatish Balay 
8648798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
8658798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
8668798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8675a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
8688798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8695a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
870bbb85fb3SSatish Balay   PetscFunctionReturn(0);
871bbb85fb3SSatish Balay }
872bbb85fb3SSatish Balay 
873bbb85fb3SSatish Balay #undef __FUNC__
874b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ"
875bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
876bbb85fb3SSatish Balay {
877bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
878a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
879a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
8807c922b88SBarry Smith   int         *row,*col,other_disassembled;
8817c922b88SBarry Smith   PetscTruth  r1,r2,r3;
8823eda8832SBarry Smith   MatScalar   *val;
883bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
884bbb85fb3SSatish Balay 
885bbb85fb3SSatish Balay   PetscFunctionBegin;
886bbb85fb3SSatish Balay   if (!baij->donotstash) {
887a2d1c673SSatish Balay     while (1) {
8888798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
889a2d1c673SSatish Balay       if (!flg) break;
890a2d1c673SSatish Balay 
891bbb85fb3SSatish Balay       for (i=0; i<n;) {
892bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
893bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
894bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
895bbb85fb3SSatish Balay         else       ncols = n-i;
896bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
897*93fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
898bbb85fb3SSatish Balay         i = j;
899bbb85fb3SSatish Balay       }
900bbb85fb3SSatish Balay     }
9018798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
902a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
903a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
904a2d1c673SSatish Balay        restore the original flags */
905a2d1c673SSatish Balay     r1 = baij->roworiented;
906a2d1c673SSatish Balay     r2 = a->roworiented;
907a2d1c673SSatish Balay     r3 = b->roworiented;
9087c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
9097c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
9107c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
911a2d1c673SSatish Balay     while (1) {
9128798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
913a2d1c673SSatish Balay       if (!flg) break;
914a2d1c673SSatish Balay 
915a2d1c673SSatish Balay       for (i=0; i<n;) {
916a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
917a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
918a2d1c673SSatish Balay         if (j < n) ncols = j-i;
919a2d1c673SSatish Balay         else       ncols = n-i;
920*93fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
921a2d1c673SSatish Balay         i = j;
922a2d1c673SSatish Balay       }
923a2d1c673SSatish Balay     }
9248798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
925a2d1c673SSatish Balay     baij->roworiented = r1;
926a2d1c673SSatish Balay     a->roworiented    = r2;
927a2d1c673SSatish Balay     b->roworiented    = r3;
928bbb85fb3SSatish Balay   }
929bbb85fb3SSatish Balay 
930bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
931bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
932bbb85fb3SSatish Balay 
933bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
934bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
935bbb85fb3SSatish Balay   /*
936bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
937bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
938bbb85fb3SSatish Balay   */
939bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
940bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
941bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
942bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
943bbb85fb3SSatish Balay     }
944bbb85fb3SSatish Balay   }
945bbb85fb3SSatish Balay 
946bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
947bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
948bbb85fb3SSatish Balay   }
949bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
950bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
951bbb85fb3SSatish Balay 
952aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
953bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
954c38d4ed2SBarry Smith     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
955bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
956bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
957bbb85fb3SSatish Balay   }
958bbb85fb3SSatish Balay #endif
959bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
960bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
961bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
962bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
963bbb85fb3SSatish Balay   }
964bbb85fb3SSatish Balay 
965606d414cSSatish Balay   if (baij->rowvalues) {
966606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
967606d414cSSatish Balay     baij->rowvalues = 0;
968606d414cSSatish Balay   }
969bbb85fb3SSatish Balay   PetscFunctionReturn(0);
970bbb85fb3SSatish Balay }
97157b952d6SSatish Balay 
9725615d1e5SSatish Balay #undef __FUNC__
973b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_Binary"
97457b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
97557b952d6SSatish Balay {
97657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
97757b952d6SSatish Balay   int          ierr;
97857b952d6SSatish Balay 
979d64ed03dSBarry Smith   PetscFunctionBegin;
98057b952d6SSatish Balay   if (baij->size == 1) {
98157b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
982a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
9833a40ed3dSBarry Smith   PetscFunctionReturn(0);
98457b952d6SSatish Balay }
98557b952d6SSatish Balay 
9865615d1e5SSatish Balay #undef __FUNC__
987b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket"
9887b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
98957b952d6SSatish Balay {
99057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
99177ed5343SBarry Smith   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
9926831982aSBarry Smith   PetscTruth   isascii,isdraw;
99357b952d6SSatish Balay 
994d64ed03dSBarry Smith   PetscFunctionBegin;
9956831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
9966831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
9970f5bd95cSBarry Smith   if (isascii) {
998d41123aaSBarry Smith     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
999639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
10004e220ebcSLois Curfman McInnes       MatInfo info;
1001d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1002d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
10036831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
10044e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
10056831982aSBarry Smith               baij->bs,(int)info.memory);CHKERRQ(ierr);
1006d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
10076831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1008d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
10096831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
10106831982aSBarry Smith       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
101157b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
10123a40ed3dSBarry Smith       PetscFunctionReturn(0);
1013d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
10146831982aSBarry Smith       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
10153a40ed3dSBarry Smith       PetscFunctionReturn(0);
101657b952d6SSatish Balay     }
101757b952d6SSatish Balay   }
101857b952d6SSatish Balay 
10190f5bd95cSBarry Smith   if (isdraw) {
102057b952d6SSatish Balay     Draw       draw;
102157b952d6SSatish Balay     PetscTruth isnull;
102277ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
10233a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
102457b952d6SSatish Balay   }
102557b952d6SSatish Balay 
102657b952d6SSatish Balay   if (size == 1) {
102757b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1028d64ed03dSBarry Smith   } else {
102957b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
103057b952d6SSatish Balay     Mat         A;
103157b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
10323eda8832SBarry Smith     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
10333eda8832SBarry Smith     MatScalar   *a;
103457b952d6SSatish Balay 
103557b952d6SSatish Balay     if (!rank) {
103655843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1037d64ed03dSBarry Smith     } else {
103855843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
103957b952d6SSatish Balay     }
104057b952d6SSatish Balay     PLogObjectParent(mat,A);
104157b952d6SSatish Balay 
104257b952d6SSatish Balay     /* copy over the A part */
104357b952d6SSatish Balay     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
104457b952d6SSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
104557b952d6SSatish Balay     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
104657b952d6SSatish Balay 
104757b952d6SSatish Balay     for (i=0; i<mbs; i++) {
104857b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
104957b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
105057b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
105157b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
105257b952d6SSatish Balay         for (k=0; k<bs; k++) {
1053*93fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1054cee3aa6bSSatish Balay           col++; a += bs;
105557b952d6SSatish Balay         }
105657b952d6SSatish Balay       }
105757b952d6SSatish Balay     }
105857b952d6SSatish Balay     /* copy over the B part */
105957b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
106057b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
106157b952d6SSatish Balay     for (i=0; i<mbs; i++) {
106257b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
106357b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
106457b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
106557b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
106657b952d6SSatish Balay         for (k=0; k<bs; k++) {
1067*93fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1068cee3aa6bSSatish Balay           col++; a += bs;
106957b952d6SSatish Balay         }
107057b952d6SSatish Balay       }
107157b952d6SSatish Balay     }
1072606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
10736d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10746d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
107555843e3eSBarry Smith     /*
107655843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
107755843e3eSBarry Smith        synchronized across all processors that share the Draw object
107855843e3eSBarry Smith     */
10796831982aSBarry Smith     if (!rank) {
10806831982aSBarry Smith       Viewer sviewer;
10816831982aSBarry Smith       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
10826831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
10836831982aSBarry Smith       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
108457b952d6SSatish Balay     }
108557b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
108657b952d6SSatish Balay   }
10873a40ed3dSBarry Smith   PetscFunctionReturn(0);
108857b952d6SSatish Balay }
108957b952d6SSatish Balay 
109057b952d6SSatish Balay 
109157b952d6SSatish Balay 
10925615d1e5SSatish Balay #undef __FUNC__
1093b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ"
1094e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer)
109557b952d6SSatish Balay {
109657b952d6SSatish Balay   int        ierr;
10976831982aSBarry Smith   PetscTruth isascii,isdraw,issocket,isbinary;
109857b952d6SSatish Balay 
1099d64ed03dSBarry Smith   PetscFunctionBegin;
11006831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
11016831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
11026831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
11036831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
11040f5bd95cSBarry Smith   if (isascii || isdraw || issocket || isbinary) {
11057b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
11065cd90555SBarry Smith   } else {
11070f5bd95cSBarry Smith     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
110857b952d6SSatish Balay   }
11093a40ed3dSBarry Smith   PetscFunctionReturn(0);
111057b952d6SSatish Balay }
111157b952d6SSatish Balay 
11125615d1e5SSatish Balay #undef __FUNC__
1113b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ"
1114e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
111579bdfe76SSatish Balay {
111679bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
111779bdfe76SSatish Balay   int         ierr;
111879bdfe76SSatish Balay 
1119d64ed03dSBarry Smith   PetscFunctionBegin;
112098dd23e9SBarry Smith 
112198dd23e9SBarry Smith   if (mat->mapping) {
112298dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
112398dd23e9SBarry Smith   }
112498dd23e9SBarry Smith   if (mat->bmapping) {
112598dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
112698dd23e9SBarry Smith   }
112761b13de0SBarry Smith   if (mat->rmap) {
112861b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
112961b13de0SBarry Smith   }
113061b13de0SBarry Smith   if (mat->cmap) {
113161b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
113261b13de0SBarry Smith   }
1133aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1134e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
113579bdfe76SSatish Balay #endif
113679bdfe76SSatish Balay 
11378798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
11388798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
11398798bf22SSatish Balay 
1140606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
114179bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
114279bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1143aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
11440f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
114548e59246SSatish Balay #else
1146606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
114748e59246SSatish Balay #endif
1148606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1149606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1150606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1151606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1152606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1153606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1154606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
115579bdfe76SSatish Balay   PLogObjectDestroy(mat);
115679bdfe76SSatish Balay   PetscHeaderDestroy(mat);
11573a40ed3dSBarry Smith   PetscFunctionReturn(0);
115879bdfe76SSatish Balay }
115979bdfe76SSatish Balay 
11605615d1e5SSatish Balay #undef __FUNC__
1161b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ"
1162ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1163cee3aa6bSSatish Balay {
1164cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
116547b4a8eaSLois Curfman McInnes   int         ierr,nt;
1166cee3aa6bSSatish Balay 
1167d64ed03dSBarry Smith   PetscFunctionBegin;
1168e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
116947b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1170a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
117147b4a8eaSLois Curfman McInnes   }
1172e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
117347b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1174a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
117547b4a8eaSLois Curfman McInnes   }
117643a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1177f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
117843a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1179f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
118043a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
11813a40ed3dSBarry Smith   PetscFunctionReturn(0);
1182cee3aa6bSSatish Balay }
1183cee3aa6bSSatish Balay 
11845615d1e5SSatish Balay #undef __FUNC__
1185b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ"
1186ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1187cee3aa6bSSatish Balay {
1188cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1189cee3aa6bSSatish Balay   int        ierr;
1190d64ed03dSBarry Smith 
1191d64ed03dSBarry Smith   PetscFunctionBegin;
119243a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1193f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
119443a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1195f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
11963a40ed3dSBarry Smith   PetscFunctionReturn(0);
1197cee3aa6bSSatish Balay }
1198cee3aa6bSSatish Balay 
11995615d1e5SSatish Balay #undef __FUNC__
1200b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ"
12017c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1202cee3aa6bSSatish Balay {
1203cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1204cee3aa6bSSatish Balay   int         ierr;
1205cee3aa6bSSatish Balay 
1206d64ed03dSBarry Smith   PetscFunctionBegin;
1207cee3aa6bSSatish Balay   /* do nondiagonal part */
12087c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209cee3aa6bSSatish Balay   /* send it on its way */
1210537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1211cee3aa6bSSatish Balay   /* do local part */
12127c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1213cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1214cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1215cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1216639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12173a40ed3dSBarry Smith   PetscFunctionReturn(0);
1218cee3aa6bSSatish Balay }
1219cee3aa6bSSatish Balay 
12205615d1e5SSatish Balay #undef __FUNC__
1221b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ"
12227c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1223cee3aa6bSSatish Balay {
1224cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1225cee3aa6bSSatish Balay   int         ierr;
1226cee3aa6bSSatish Balay 
1227d64ed03dSBarry Smith   PetscFunctionBegin;
1228cee3aa6bSSatish Balay   /* do nondiagonal part */
12297c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1230cee3aa6bSSatish Balay   /* send it on its way */
1231537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1232cee3aa6bSSatish Balay   /* do local part */
12337c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1234cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1235cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1236cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1237537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12383a40ed3dSBarry Smith   PetscFunctionReturn(0);
1239cee3aa6bSSatish Balay }
1240cee3aa6bSSatish Balay 
1241cee3aa6bSSatish Balay /*
1242cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1243cee3aa6bSSatish Balay    diagonal block
1244cee3aa6bSSatish Balay */
12455615d1e5SSatish Balay #undef __FUNC__
1246b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ"
1247ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1248cee3aa6bSSatish Balay {
1249cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
12503a40ed3dSBarry Smith   int         ierr;
1251d64ed03dSBarry Smith 
1252d64ed03dSBarry Smith   PetscFunctionBegin;
1253a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
12543a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
12553a40ed3dSBarry Smith   PetscFunctionReturn(0);
1256cee3aa6bSSatish Balay }
1257cee3aa6bSSatish Balay 
12585615d1e5SSatish Balay #undef __FUNC__
1259b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ"
1260ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1261cee3aa6bSSatish Balay {
1262cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1263cee3aa6bSSatish Balay   int         ierr;
1264d64ed03dSBarry Smith 
1265d64ed03dSBarry Smith   PetscFunctionBegin;
1266cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1267cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
12683a40ed3dSBarry Smith   PetscFunctionReturn(0);
1269cee3aa6bSSatish Balay }
1270026e39d0SSatish Balay 
12715615d1e5SSatish Balay #undef __FUNC__
1272b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ"
1273ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
127457b952d6SSatish Balay {
127557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1276d64ed03dSBarry Smith 
1277d64ed03dSBarry Smith   PetscFunctionBegin;
1278bd7f49f5SSatish Balay   if (m) *m = mat->M;
1279bd7f49f5SSatish Balay   if (n) *n = mat->N;
12803a40ed3dSBarry Smith   PetscFunctionReturn(0);
128157b952d6SSatish Balay }
128257b952d6SSatish Balay 
12835615d1e5SSatish Balay #undef __FUNC__
1284b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ"
1285ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
128657b952d6SSatish Balay {
128757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1288d64ed03dSBarry Smith 
1289d64ed03dSBarry Smith   PetscFunctionBegin;
1290f830108cSBarry Smith   *m = mat->m; *n = mat->n;
12913a40ed3dSBarry Smith   PetscFunctionReturn(0);
129257b952d6SSatish Balay }
129357b952d6SSatish Balay 
12945615d1e5SSatish Balay #undef __FUNC__
1295b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ"
1296ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
129757b952d6SSatish Balay {
129857b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1299d64ed03dSBarry Smith 
1300d64ed03dSBarry Smith   PetscFunctionBegin;
1301a21fb8cbSBarry Smith   if (m) *m = mat->rstart*mat->bs;
1302a21fb8cbSBarry Smith   if (n) *n = mat->rend*mat->bs;
13033a40ed3dSBarry Smith   PetscFunctionReturn(0);
130457b952d6SSatish Balay }
130557b952d6SSatish Balay 
13065615d1e5SSatish Balay #undef __FUNC__
1307b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ"
1308acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1309acdf5bf4SSatish Balay {
1310acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1311acdf5bf4SSatish Balay   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1312acdf5bf4SSatish Balay   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1313d9d09a02SSatish Balay   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1314d9d09a02SSatish Balay   int        *cmap,*idx_p,cstart = mat->cstart;
1315acdf5bf4SSatish Balay 
1316d64ed03dSBarry Smith   PetscFunctionBegin;
1317a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1318acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1319acdf5bf4SSatish Balay 
1320acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1321acdf5bf4SSatish Balay     /*
1322acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1323acdf5bf4SSatish Balay     */
1324acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1325bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1326bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1327acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1328acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1329acdf5bf4SSatish Balay     }
1330549d3d68SSatish Balay     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1331acdf5bf4SSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1332acdf5bf4SSatish Balay   }
1333acdf5bf4SSatish Balay 
1334a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1335d9d09a02SSatish Balay   lrow = row - brstart;
1336acdf5bf4SSatish Balay 
1337acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1338acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1339acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1340f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1341f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1342acdf5bf4SSatish Balay   nztot = nzA + nzB;
1343acdf5bf4SSatish Balay 
1344acdf5bf4SSatish Balay   cmap  = mat->garray;
1345acdf5bf4SSatish Balay   if (v  || idx) {
1346acdf5bf4SSatish Balay     if (nztot) {
1347acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1348acdf5bf4SSatish Balay       int imark = -1;
1349acdf5bf4SSatish Balay       if (v) {
1350acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1351acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1352d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1353acdf5bf4SSatish Balay           else break;
1354acdf5bf4SSatish Balay         }
1355acdf5bf4SSatish Balay         imark = i;
1356acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1357acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1358acdf5bf4SSatish Balay       }
1359acdf5bf4SSatish Balay       if (idx) {
1360acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1361acdf5bf4SSatish Balay         if (imark > -1) {
1362acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1363bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1364acdf5bf4SSatish Balay           }
1365acdf5bf4SSatish Balay         } else {
1366acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1367d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1368d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1369acdf5bf4SSatish Balay             else break;
1370acdf5bf4SSatish Balay           }
1371acdf5bf4SSatish Balay           imark = i;
1372acdf5bf4SSatish Balay         }
1373d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1374d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1375acdf5bf4SSatish Balay       }
1376d64ed03dSBarry Smith     } else {
1377d212a18eSSatish Balay       if (idx) *idx = 0;
1378d212a18eSSatish Balay       if (v)   *v   = 0;
1379d212a18eSSatish Balay     }
1380acdf5bf4SSatish Balay   }
1381acdf5bf4SSatish Balay   *nz = nztot;
1382f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1383f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
13843a40ed3dSBarry Smith   PetscFunctionReturn(0);
1385acdf5bf4SSatish Balay }
1386acdf5bf4SSatish Balay 
13875615d1e5SSatish Balay #undef __FUNC__
1388b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ"
1389acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1390acdf5bf4SSatish Balay {
1391acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1392d64ed03dSBarry Smith 
1393d64ed03dSBarry Smith   PetscFunctionBegin;
1394acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1395a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1396acdf5bf4SSatish Balay   }
1397acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
13983a40ed3dSBarry Smith   PetscFunctionReturn(0);
1399acdf5bf4SSatish Balay }
1400acdf5bf4SSatish Balay 
14015615d1e5SSatish Balay #undef __FUNC__
1402b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ"
1403ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
14045a838052SSatish Balay {
14055a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1406d64ed03dSBarry Smith 
1407d64ed03dSBarry Smith   PetscFunctionBegin;
14085a838052SSatish Balay   *bs = baij->bs;
14093a40ed3dSBarry Smith   PetscFunctionReturn(0);
14105a838052SSatish Balay }
14115a838052SSatish Balay 
14125615d1e5SSatish Balay #undef __FUNC__
1413b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ"
1414ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
141558667388SSatish Balay {
141658667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
141758667388SSatish Balay   int         ierr;
1418d64ed03dSBarry Smith 
1419d64ed03dSBarry Smith   PetscFunctionBegin;
142058667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
142158667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14223a40ed3dSBarry Smith   PetscFunctionReturn(0);
142358667388SSatish Balay }
14240ac07820SSatish Balay 
14255615d1e5SSatish Balay #undef __FUNC__
1426b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ"
1427ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14280ac07820SSatish Balay {
14294e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
14304e220ebcSLois Curfman McInnes   Mat         A = a->A,B = a->B;
14317d57db60SLois Curfman McInnes   int         ierr;
1432329f5518SBarry Smith   PetscReal   isend[5],irecv[5];
14330ac07820SSatish Balay 
1434d64ed03dSBarry Smith   PetscFunctionBegin;
14354e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
14364e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
14370e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1438de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14394e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
14400e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1441de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
14420ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14434e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14444e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14454e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14464e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14474e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14480ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1449f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
14504e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14514e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14524e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14534e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14544e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14550ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1456f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
14574e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14584e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14594e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14604e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14614e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1462d41123aaSBarry Smith   } else {
1463d41123aaSBarry Smith     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
14640ac07820SSatish Balay   }
14655a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
14665a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
14675a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
14685a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
14694e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14704e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14714e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14723a40ed3dSBarry Smith   PetscFunctionReturn(0);
14730ac07820SSatish Balay }
14740ac07820SSatish Balay 
14755615d1e5SSatish Balay #undef __FUNC__
1476b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ"
1477ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
147858667388SSatish Balay {
147958667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
148098305bb5SBarry Smith   int         ierr;
148158667388SSatish Balay 
1482d64ed03dSBarry Smith   PetscFunctionBegin;
148358667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
148458667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
14856da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1486c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
14874787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
14887c922b88SBarry Smith       op == MAT_KEEP_ZEROED_ROWS ||
14894787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
149098305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
149198305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1492b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
14937c922b88SBarry Smith         a->roworiented = PETSC_TRUE;
149498305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
149598305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1496b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
14976da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
149858667388SSatish Balay              op == MAT_SYMMETRIC ||
149958667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1500b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
150198305bb5SBarry Smith              op == MAT_USE_HASH_TABLE) {
150258667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
150398305bb5SBarry Smith   } else if (op == MAT_COLUMN_ORIENTED) {
15047c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
150598305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
150698305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
15072b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
15087c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
1509d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1510d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1511133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
15127c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
1513d64ed03dSBarry Smith   } else {
1514d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1515d64ed03dSBarry Smith   }
15163a40ed3dSBarry Smith   PetscFunctionReturn(0);
151758667388SSatish Balay }
151858667388SSatish Balay 
15195615d1e5SSatish Balay #undef __FUNC__
1520b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ("
1521ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15220ac07820SSatish Balay {
15230ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
15240ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15250ac07820SSatish Balay   Mat         B;
152640011551SBarry Smith   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
15270ac07820SSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
15283eda8832SBarry Smith   MatScalar   *a;
15290ac07820SSatish Balay 
1530d64ed03dSBarry Smith   PetscFunctionBegin;
15317c922b88SBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
15327c922b88SBarry Smith   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
15330ac07820SSatish Balay 
15340ac07820SSatish Balay   /* copy over the A part */
15350ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15360ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15370ac07820SSatish Balay   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
15380ac07820SSatish Balay 
15390ac07820SSatish Balay   for (i=0; i<mbs; i++) {
15400ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15410ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
15420ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
15430ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
15440ac07820SSatish Balay       for (k=0; k<bs; k++) {
1545*93fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15460ac07820SSatish Balay         col++; a += bs;
15470ac07820SSatish Balay       }
15480ac07820SSatish Balay     }
15490ac07820SSatish Balay   }
15500ac07820SSatish Balay   /* copy over the B part */
15510ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
15520ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15530ac07820SSatish Balay   for (i=0; i<mbs; i++) {
15540ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15550ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
15560ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
15570ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
15580ac07820SSatish Balay       for (k=0; k<bs; k++) {
1559*93fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15600ac07820SSatish Balay         col++; a += bs;
15610ac07820SSatish Balay       }
15620ac07820SSatish Balay     }
15630ac07820SSatish Balay   }
1564606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
15650ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15660ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15670ac07820SSatish Balay 
15687c922b88SBarry Smith   if (matout) {
15690ac07820SSatish Balay     *matout = B;
15700ac07820SSatish Balay   } else {
1571f830108cSBarry Smith     PetscOps *Abops;
1572cc2dc46cSBarry Smith     MatOps   Aops;
1573f830108cSBarry Smith 
15740ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
1575606d414cSSatish Balay     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
15760ac07820SSatish Balay     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
15770ac07820SSatish Balay     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1578aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
15790f5bd95cSBarry Smith     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1580b1fc9764SSatish Balay #else
1581606d414cSSatish Balay     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1582b1fc9764SSatish Balay #endif
1583606d414cSSatish Balay     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1584606d414cSSatish Balay     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1585606d414cSSatish Balay     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1586606d414cSSatish Balay     ierr = PetscFree(baij);CHKERRQ(ierr);
1587f830108cSBarry Smith 
1588f830108cSBarry Smith     /*
1589f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1590f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1591f830108cSBarry Smith       else from C.
1592f830108cSBarry Smith     */
1593f830108cSBarry Smith     Abops   = A->bops;
1594f830108cSBarry Smith     Aops    = A->ops;
1595549d3d68SSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1596f830108cSBarry Smith     A->bops = Abops;
1597f830108cSBarry Smith     A->ops  = Aops;
1598f830108cSBarry Smith 
15990ac07820SSatish Balay     PetscHeaderDestroy(B);
16000ac07820SSatish Balay   }
16013a40ed3dSBarry Smith   PetscFunctionReturn(0);
16020ac07820SSatish Balay }
16030e95ebc0SSatish Balay 
16045615d1e5SSatish Balay #undef __FUNC__
1605b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ"
160636c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16070e95ebc0SSatish Balay {
160836c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
160936c4a09eSSatish Balay   Mat         a = baij->A,b = baij->B;
16100e95ebc0SSatish Balay   int         ierr,s1,s2,s3;
16110e95ebc0SSatish Balay 
1612d64ed03dSBarry Smith   PetscFunctionBegin;
161336c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
161436c4a09eSSatish Balay   if (rr) {
161536c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
161636c4a09eSSatish Balay     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
161736c4a09eSSatish Balay     /* Overlap communication with computation. */
161836c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
161936c4a09eSSatish Balay   }
16200e95ebc0SSatish Balay   if (ll) {
16210e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
162236c4a09eSSatish Balay     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1623a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
16240e95ebc0SSatish Balay   }
162536c4a09eSSatish Balay   /* scale  the diagonal block */
162636c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
162736c4a09eSSatish Balay 
162836c4a09eSSatish Balay   if (rr) {
162936c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
163036c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1631a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
163236c4a09eSSatish Balay   }
163336c4a09eSSatish Balay 
16343a40ed3dSBarry Smith   PetscFunctionReturn(0);
16350e95ebc0SSatish Balay }
16360e95ebc0SSatish Balay 
16375615d1e5SSatish Balay #undef __FUNC__
1638b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ"
1639ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
16400ac07820SSatish Balay {
16410ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
16420ac07820SSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1643a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
16440ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
16450ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1646a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16470ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16480ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16490ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16500ac07820SSatish Balay   IS             istmp;
16510ac07820SSatish Balay 
1652d64ed03dSBarry Smith   PetscFunctionBegin;
16530ac07820SSatish Balay   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
16540ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
16550ac07820SSatish Balay 
16560ac07820SSatish Balay   /*  first count number of contributors to each processor */
16570ac07820SSatish Balay   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1658549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1659549d3d68SSatish Balay   procs  = nprocs + size;
16600ac07820SSatish Balay   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
16610ac07820SSatish Balay   for (i=0; i<N; i++) {
16620ac07820SSatish Balay     idx   = rows[i];
16630ac07820SSatish Balay     found = 0;
16640ac07820SSatish Balay     for (j=0; j<size; j++) {
16650ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
16660ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
16670ac07820SSatish Balay       }
16680ac07820SSatish Balay     }
1669a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
16700ac07820SSatish Balay   }
16710ac07820SSatish Balay   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
16720ac07820SSatish Balay 
16730ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
16746831982aSBarry Smith   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
16756831982aSBarry Smith   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
16760ac07820SSatish Balay   nmax   = work[rank];
16776831982aSBarry Smith   nrecvs = work[size+rank];
1678606d414cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
16790ac07820SSatish Balay 
16800ac07820SSatish Balay   /* post receives:   */
1681d64ed03dSBarry Smith   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1682d64ed03dSBarry Smith   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
16830ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1684ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
16850ac07820SSatish Balay   }
16860ac07820SSatish Balay 
16870ac07820SSatish Balay   /* do sends:
16880ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
16890ac07820SSatish Balay      the ith processor
16900ac07820SSatish Balay   */
16910ac07820SSatish Balay   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1692ca161407SBarry Smith   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
16930ac07820SSatish Balay   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
16940ac07820SSatish Balay   starts[0]  = 0;
16950ac07820SSatish Balay   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
16960ac07820SSatish Balay   for (i=0; i<N; i++) {
16970ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
16980ac07820SSatish Balay   }
16996831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
17000ac07820SSatish Balay 
17010ac07820SSatish Balay   starts[0] = 0;
17020ac07820SSatish Balay   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
17030ac07820SSatish Balay   count = 0;
17040ac07820SSatish Balay   for (i=0; i<size; i++) {
17050ac07820SSatish Balay     if (procs[i]) {
1706ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17070ac07820SSatish Balay     }
17080ac07820SSatish Balay   }
1709606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
17100ac07820SSatish Balay 
17110ac07820SSatish Balay   base = owners[rank]*bs;
17120ac07820SSatish Balay 
17130ac07820SSatish Balay   /*  wait on receives */
17140ac07820SSatish Balay   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
17150ac07820SSatish Balay   source = lens + nrecvs;
17160ac07820SSatish Balay   count  = nrecvs; slen = 0;
17170ac07820SSatish Balay   while (count) {
1718ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17190ac07820SSatish Balay     /* unpack receives into our local space */
1720ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
17210ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17220ac07820SSatish Balay     lens[imdex]    = n;
17230ac07820SSatish Balay     slen          += n;
17240ac07820SSatish Balay     count--;
17250ac07820SSatish Balay   }
1726606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
17270ac07820SSatish Balay 
17280ac07820SSatish Balay   /* move the data into the send scatter */
17290ac07820SSatish Balay   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
17300ac07820SSatish Balay   count = 0;
17310ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
17320ac07820SSatish Balay     values = rvalues + i*nmax;
17330ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
17340ac07820SSatish Balay       lrows[count++] = values[j] - base;
17350ac07820SSatish Balay     }
17360ac07820SSatish Balay   }
1737606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1738606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1739606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1740606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
17410ac07820SSatish Balay 
17420ac07820SSatish Balay   /* actually zap the local rows */
1743029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
17440ac07820SSatish Balay   PLogObjectParent(A,istmp);
1745a07cd24cSSatish Balay 
174672dacd9aSBarry Smith   /*
174772dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
174872dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
174972dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
175072dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
175172dacd9aSBarry Smith 
175272dacd9aSBarry Smith        Contributed by: Mathew Knepley
175372dacd9aSBarry Smith   */
17549c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
17553eda8832SBarry Smith   ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr);
17569c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
17573eda8832SBarry Smith     ierr      = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr);
17589c957beeSSatish Balay   } else if (diag) {
17593eda8832SBarry Smith     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1760fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1761fa46199cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1762fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
17636525c446SSatish Balay     }
1764a07cd24cSSatish Balay     for (i = 0; i < slen; i++) {
1765a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
17663eda8832SBarry Smith       ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1767a07cd24cSSatish Balay     }
1768a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1769a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17709c957beeSSatish Balay   } else {
17713eda8832SBarry Smith     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1772a07cd24cSSatish Balay   }
17739c957beeSSatish Balay 
17749c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1775606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1776a07cd24cSSatish Balay 
17770ac07820SSatish Balay   /* wait on sends */
17780ac07820SSatish Balay   if (nsends) {
1779d64ed03dSBarry Smith     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1780ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1781606d414cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
17820ac07820SSatish Balay   }
1783606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1784606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
17850ac07820SSatish Balay 
17863a40ed3dSBarry Smith   PetscFunctionReturn(0);
17870ac07820SSatish Balay }
178872dacd9aSBarry Smith 
17895615d1e5SSatish Balay #undef __FUNC__
1790b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ"
1791ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1792ba4ca20aSSatish Balay {
1793ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
179425fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1795133cdb44SSatish Balay   static int  called = 0;
17963a40ed3dSBarry Smith   int         ierr;
1797ba4ca20aSSatish Balay 
1798d64ed03dSBarry Smith   PetscFunctionBegin;
17993a40ed3dSBarry Smith   if (!a->rank) {
18003a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
180125fdafccSSatish Balay   }
180225fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1803d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1804d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
18053a40ed3dSBarry Smith   PetscFunctionReturn(0);
1806ba4ca20aSSatish Balay }
18070ac07820SSatish Balay 
18085615d1e5SSatish Balay #undef __FUNC__
1809b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ"
1810ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1811bb5a7306SBarry Smith {
1812bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1813bb5a7306SBarry Smith   int         ierr;
1814d64ed03dSBarry Smith 
1815d64ed03dSBarry Smith   PetscFunctionBegin;
1816bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
18173a40ed3dSBarry Smith   PetscFunctionReturn(0);
1818bb5a7306SBarry Smith }
1819bb5a7306SBarry Smith 
18202e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18210ac07820SSatish Balay 
18227fc3c18eSBarry Smith #undef __FUNC__
1823b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ"
18247fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
18257fc3c18eSBarry Smith {
18267fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
18277fc3c18eSBarry Smith   Mat         a,b,c,d;
18287fc3c18eSBarry Smith   PetscTruth  flg;
18297fc3c18eSBarry Smith   int         ierr;
18307fc3c18eSBarry Smith 
18317fc3c18eSBarry Smith   PetscFunctionBegin;
18327fc3c18eSBarry Smith   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
18337fc3c18eSBarry Smith   a = matA->A; b = matA->B;
18347fc3c18eSBarry Smith   c = matB->A; d = matB->B;
18357fc3c18eSBarry Smith 
18367fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
18377fc3c18eSBarry Smith   if (flg == PETSC_TRUE) {
18387fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18397fc3c18eSBarry Smith   }
18407fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
18417fc3c18eSBarry Smith   PetscFunctionReturn(0);
18427fc3c18eSBarry Smith }
18437fc3c18eSBarry Smith 
184479bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1845cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1846cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1847cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1848cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1849cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1850cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
18517c922b88SBarry Smith   MatMultTranspose_MPIBAIJ,
18527c922b88SBarry Smith   MatMultTransposeAdd_MPIBAIJ,
1853cc2dc46cSBarry Smith   0,
1854cc2dc46cSBarry Smith   0,
1855cc2dc46cSBarry Smith   0,
1856cc2dc46cSBarry Smith   0,
1857cc2dc46cSBarry Smith   0,
1858cc2dc46cSBarry Smith   0,
1859cc2dc46cSBarry Smith   0,
1860cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1861cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
18627fc3c18eSBarry Smith   MatEqual_MPIBAIJ,
1863cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1864cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1865cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1866cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1867cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1868cc2dc46cSBarry Smith   0,
1869cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1870cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1871cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1872cc2dc46cSBarry Smith   0,
1873cc2dc46cSBarry Smith   0,
1874cc2dc46cSBarry Smith   0,
1875cc2dc46cSBarry Smith   0,
1876cc2dc46cSBarry Smith   MatGetSize_MPIBAIJ,
1877cc2dc46cSBarry Smith   MatGetLocalSize_MPIBAIJ,
1878cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1879cc2dc46cSBarry Smith   0,
1880cc2dc46cSBarry Smith   0,
1881cc2dc46cSBarry Smith   0,
1882cc2dc46cSBarry Smith   0,
18832e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1884cc2dc46cSBarry Smith   0,
1885cc2dc46cSBarry Smith   0,
1886cc2dc46cSBarry Smith   0,
1887cc2dc46cSBarry Smith   0,
1888cc2dc46cSBarry Smith   0,
1889cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1890cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1891cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1892cc2dc46cSBarry Smith   0,
1893cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1894cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1895cc2dc46cSBarry Smith   0,
1896cc2dc46cSBarry Smith   0,
1897cc2dc46cSBarry Smith   0,
1898cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1899cc2dc46cSBarry Smith   0,
1900cc2dc46cSBarry Smith   0,
1901cc2dc46cSBarry Smith   0,
1902cc2dc46cSBarry Smith   0,
1903cc2dc46cSBarry Smith   0,
1904cc2dc46cSBarry Smith   0,
1905cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
1906cc2dc46cSBarry Smith   0,
1907cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
1908cc2dc46cSBarry Smith   0,
1909cc2dc46cSBarry Smith   0,
1910cc2dc46cSBarry Smith   0,
1911cc2dc46cSBarry Smith   MatGetMaps_Petsc};
191279bdfe76SSatish Balay 
19135ef9f2a5SBarry Smith 
1914e18c124aSSatish Balay EXTERN_C_BEGIN
19155ef9f2a5SBarry Smith #undef __FUNC__
1916b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ"
19175ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
19185ef9f2a5SBarry Smith {
19195ef9f2a5SBarry Smith   PetscFunctionBegin;
19205ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
19215ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
19225ef9f2a5SBarry Smith   PetscFunctionReturn(0);
19235ef9f2a5SBarry Smith }
1924e18c124aSSatish Balay EXTERN_C_END
192579bdfe76SSatish Balay 
19265615d1e5SSatish Balay #undef __FUNC__
1927b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ"
192879bdfe76SSatish Balay /*@C
192979bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
193079bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
193179bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
193279bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
193379bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
193479bdfe76SSatish Balay 
1935db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1936db81eaa0SLois Curfman McInnes 
193779bdfe76SSatish Balay    Input Parameters:
1938db81eaa0SLois Curfman McInnes +  comm - MPI communicator
193979bdfe76SSatish Balay .  bs   - size of blockk
194079bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
194192e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
194292e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
194392e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
194492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
194592e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
1946be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1947be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
194879bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
194979bdfe76SSatish Balay            submatrix  (same for all local rows)
19507fc3c18eSBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
195192e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
1952db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
195392e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
195479bdfe76SSatish Balay            submatrix (same for all local rows).
19557fc3c18eSBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
195692e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
195792e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
195879bdfe76SSatish Balay 
195979bdfe76SSatish Balay    Output Parameter:
196079bdfe76SSatish Balay .  A - the matrix
196179bdfe76SSatish Balay 
1962db81eaa0SLois Curfman McInnes    Options Database Keys:
1963db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
1964db81eaa0SLois Curfman McInnes                      block calculations (much slower)
1965db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
1966494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
1967494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
19683ffaccefSLois Curfman McInnes 
1969b259b22eSLois Curfman McInnes    Notes:
197079bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
197179bdfe76SSatish Balay    (possibly both).
197279bdfe76SSatish Balay 
1973be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1974be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
1975be79a94dSBarry Smith 
197679bdfe76SSatish Balay    Storage Information:
197779bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
197879bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
197979bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
198079bdfe76SSatish Balay    local matrix (a rectangular submatrix).
198179bdfe76SSatish Balay 
198279bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
198379bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
198479bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
198579bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
198679bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
198779bdfe76SSatish Balay 
198879bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
198979bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
199079bdfe76SSatish Balay 
1991db81eaa0SLois Curfman McInnes .vb
1992db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
1993db81eaa0SLois Curfman McInnes           -------------------
1994db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
1995db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
1996db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
1997db81eaa0SLois Curfman McInnes           -------------------
1998db81eaa0SLois Curfman McInnes .ve
199979bdfe76SSatish Balay 
200079bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
200179bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
200279bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
200357b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
200479bdfe76SSatish Balay 
2005d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2006d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
200779bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
200892e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
200992e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
20106da5968aSLois Curfman McInnes    matrices.
201179bdfe76SSatish Balay 
2012027ccd11SLois Curfman McInnes    Level: intermediate
2013027ccd11SLois Curfman McInnes 
201492e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
201579bdfe76SSatish Balay 
20163eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
201779bdfe76SSatish Balay @*/
2018329f5518SBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
201979bdfe76SSatish Balay {
202079bdfe76SSatish Balay   Mat          B;
202179bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
2022f1af5d2fSBarry Smith   int          ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
2023f1af5d2fSBarry Smith   PetscTruth   flag1,flag2,flg;
202479bdfe76SSatish Balay 
2025d64ed03dSBarry Smith   PetscFunctionBegin;
2026962fb4a0SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2027962fb4a0SBarry Smith 
2028a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
202936c4a09eSSatish Balay   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
203036c4a09eSSatish Balay   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
20314fdb0a08SBarry Smith   if (d_nnz) {
203236c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
20334fdb0a08SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
20344fdb0a08SBarry Smith     }
20354fdb0a08SBarry Smith   }
20364fdb0a08SBarry Smith   if (o_nnz) {
203736c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
20384fdb0a08SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
20394fdb0a08SBarry Smith     }
20404fdb0a08SBarry Smith   }
20413914022bSBarry Smith 
2042d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2043494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2044494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2045494eafd4SBarry Smith   if (!flag1 && !flag2 && size == 1) {
20463914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
20473914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
20483914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
20493a40ed3dSBarry Smith     PetscFunctionReturn(0);
20503914022bSBarry Smith   }
20513914022bSBarry Smith 
205279bdfe76SSatish Balay   *A = 0;
20533f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
205479bdfe76SSatish Balay   PLogObjectCreate(B);
205579bdfe76SSatish Balay   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2056549d3d68SSatish Balay   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2057549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
20584c50302cSBarry Smith 
2059e1311b90SBarry Smith   B->ops->destroy    = MatDestroy_MPIBAIJ;
2060e1311b90SBarry Smith   B->ops->view       = MatView_MPIBAIJ;
206190f02eecSBarry Smith   B->mapping    = 0;
206279bdfe76SSatish Balay   B->factor     = 0;
206379bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
206479bdfe76SSatish Balay 
2065e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
2066d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2067d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
206879bdfe76SSatish Balay 
20697c922b88SBarry Smith   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2070a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2071d64ed03dSBarry Smith   }
2072a8c6a408SBarry Smith   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2073a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2074a8c6a408SBarry Smith   }
2075a8c6a408SBarry Smith   if (N == PETSC_DECIDE && n == PETSC_DECIDE) {
2076a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2077a8c6a408SBarry Smith   }
2078cee3aa6bSSatish Balay   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2079cee3aa6bSSatish Balay   if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
208079bdfe76SSatish Balay 
208179bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
208279bdfe76SSatish Balay     work[0] = m; work[1] = n;
208379bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
2084ca161407SBarry Smith     ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
208579bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
208679bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
208779bdfe76SSatish Balay   }
208879bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
208979bdfe76SSatish Balay     Mbs = M/bs;
2090a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
209179bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
209279bdfe76SSatish Balay     m   = mbs*bs;
209379bdfe76SSatish Balay   }
209479bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
209579bdfe76SSatish Balay     Nbs = N/bs;
2096a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
209779bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
209879bdfe76SSatish Balay     n   = nbs*bs;
209979bdfe76SSatish Balay   }
2100a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
2101a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2102a8c6a408SBarry Smith   }
210379bdfe76SSatish Balay 
210479bdfe76SSatish Balay   b->m = m; B->m = m;
210579bdfe76SSatish Balay   b->n = n; B->n = n;
210679bdfe76SSatish Balay   b->N = N; B->N = N;
210779bdfe76SSatish Balay   b->M = M; B->M = M;
210879bdfe76SSatish Balay   b->bs  = bs;
210979bdfe76SSatish Balay   b->bs2 = bs*bs;
211079bdfe76SSatish Balay   b->mbs = mbs;
211179bdfe76SSatish Balay   b->nbs = nbs;
211279bdfe76SSatish Balay   b->Mbs = Mbs;
211379bdfe76SSatish Balay   b->Nbs = Nbs;
211479bdfe76SSatish Balay 
2115c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
2116c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
2117488ecbafSBarry Smith   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2118488ecbafSBarry Smith   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2119c7fcc2eaSBarry Smith 
212079bdfe76SSatish Balay   /* build local table of row and column ownerships */
2121ff2fd236SBarry Smith   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2122ff2fd236SBarry Smith   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
21230ac07820SSatish Balay   b->cowners    = b->rowners + b->size + 2;
2124ff2fd236SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2125ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
212679bdfe76SSatish Balay   b->rowners[0]    = 0;
212779bdfe76SSatish Balay   for (i=2; i<=b->size; i++) {
212879bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
212979bdfe76SSatish Balay   }
2130ff2fd236SBarry Smith   for (i=0; i<=b->size; i++) {
2131ff2fd236SBarry Smith     b->rowners_bs[i] = b->rowners[i]*bs;
2132ff2fd236SBarry Smith   }
213379bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
213479bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
21354fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
21364fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
21374fa0d573SSatish Balay 
2138ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
213979bdfe76SSatish Balay   b->cowners[0] = 0;
214079bdfe76SSatish Balay   for (i=2; i<=b->size; i++) {
214179bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
214279bdfe76SSatish Balay   }
214379bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
214479bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
21454fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
21464fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
214779bdfe76SSatish Balay 
2148a07cd24cSSatish Balay 
214979bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2150029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
215179bdfe76SSatish Balay   PLogObjectParent(B,b->A);
215279bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2153029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
215479bdfe76SSatish Balay   PLogObjectParent(B,b->B);
215579bdfe76SSatish Balay 
215679bdfe76SSatish Balay   /* build cache for off array entries formed */
21578798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
21588798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
21597c922b88SBarry Smith   b->donotstash  = PETSC_FALSE;
21607c922b88SBarry Smith   b->colmap      = PETSC_NULL;
21617c922b88SBarry Smith   b->garray      = PETSC_NULL;
21627c922b88SBarry Smith   b->roworiented = PETSC_TRUE;
216379bdfe76SSatish Balay 
216430793edcSSatish Balay   /* stuff used in block assembly */
216530793edcSSatish Balay   b->barray       = 0;
216630793edcSSatish Balay 
216779bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
216879bdfe76SSatish Balay   b->lvec         = 0;
216979bdfe76SSatish Balay   b->Mvctx        = 0;
217079bdfe76SSatish Balay 
217179bdfe76SSatish Balay   /* stuff for MatGetRow() */
217279bdfe76SSatish Balay   b->rowindices   = 0;
217379bdfe76SSatish Balay   b->rowvalues    = 0;
217479bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
217579bdfe76SSatish Balay 
2176a07cd24cSSatish Balay   /* hash table stuff */
2177a07cd24cSSatish Balay   b->ht           = 0;
2178187ce0cbSSatish Balay   b->hd           = 0;
21790bdbc534SSatish Balay   b->ht_size      = 0;
21807c922b88SBarry Smith   b->ht_flag      = PETSC_FALSE;
218125fdafccSSatish Balay   b->ht_fact      = 0;
2182187ce0cbSSatish Balay   b->ht_total_ct  = 0;
2183187ce0cbSSatish Balay   b->ht_insert_ct = 0;
2184a07cd24cSSatish Balay 
218579bdfe76SSatish Balay   *A = B;
2186133cdb44SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2187133cdb44SSatish Balay   if (flg) {
2188133cdb44SSatish Balay     double fact = 1.39;
2189133cdb44SSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2190f1af5d2fSBarry Smith     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2191133cdb44SSatish Balay     if (fact <= 1.0) fact = 1.39;
2192133cdb44SSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2193133cdb44SSatish Balay     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2194133cdb44SSatish Balay   }
2195f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
21967fc3c18eSBarry Smith                                      "MatStoreValues_MPIBAIJ",
21977fc3c18eSBarry Smith                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2198f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
21997fc3c18eSBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
22007fc3c18eSBarry Smith                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2201f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
22025ef9f2a5SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
22035ef9f2a5SBarry Smith                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
22043a40ed3dSBarry Smith   PetscFunctionReturn(0);
220579bdfe76SSatish Balay }
2206026e39d0SSatish Balay 
22075615d1e5SSatish Balay #undef __FUNC__
2208b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ"
22092e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
22100ac07820SSatish Balay {
22110ac07820SSatish Balay   Mat         mat;
22120ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2213f1af5d2fSBarry Smith   int         ierr,len=0;
2214f1af5d2fSBarry Smith   PetscTruth  flg;
22150ac07820SSatish Balay 
2216d64ed03dSBarry Smith   PetscFunctionBegin;
22170ac07820SSatish Balay   *newmat       = 0;
22183f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
22190ac07820SSatish Balay   PLogObjectCreate(mat);
22200ac07820SSatish Balay   mat->data         = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2221549d3d68SSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2222e1311b90SBarry Smith   mat->ops->destroy = MatDestroy_MPIBAIJ;
2223e1311b90SBarry Smith   mat->ops->view    = MatView_MPIBAIJ;
22240ac07820SSatish Balay   mat->factor       = matin->factor;
22250ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
2226aef5e8e0SSatish Balay   mat->insertmode   = NOT_SET_VALUES;
22270ac07820SSatish Balay 
22280ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
22290ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
22300ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
22310ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
22320ac07820SSatish Balay 
22330ac07820SSatish Balay   a->bs  = oldmat->bs;
22340ac07820SSatish Balay   a->bs2 = oldmat->bs2;
22350ac07820SSatish Balay   a->mbs = oldmat->mbs;
22360ac07820SSatish Balay   a->nbs = oldmat->nbs;
22370ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
22380ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
22390ac07820SSatish Balay 
22400ac07820SSatish Balay   a->rstart       = oldmat->rstart;
22410ac07820SSatish Balay   a->rend         = oldmat->rend;
22420ac07820SSatish Balay   a->cstart       = oldmat->cstart;
22430ac07820SSatish Balay   a->cend         = oldmat->cend;
22440ac07820SSatish Balay   a->size         = oldmat->size;
22450ac07820SSatish Balay   a->rank         = oldmat->rank;
2246aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2247aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2248aef5e8e0SSatish Balay   a->rowindices   = 0;
22490ac07820SSatish Balay   a->rowvalues    = 0;
22500ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
225130793edcSSatish Balay   a->barray       = 0;
22523123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
22533123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
22543123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
22553123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
22560ac07820SSatish Balay 
2257133cdb44SSatish Balay   /* hash table stuff */
2258133cdb44SSatish Balay   a->ht           = 0;
2259133cdb44SSatish Balay   a->hd           = 0;
2260133cdb44SSatish Balay   a->ht_size      = 0;
2261133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
226225fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2263133cdb44SSatish Balay   a->ht_total_ct  = 0;
2264133cdb44SSatish Balay   a->ht_insert_ct = 0;
2265133cdb44SSatish Balay 
2266133cdb44SSatish Balay 
2267ff2fd236SBarry Smith   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2268ff2fd236SBarry Smith   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
22690ac07820SSatish Balay   a->cowners    = a->rowners + a->size + 2;
2270ff2fd236SBarry Smith   a->rowners_bs = a->cowners + a->size + 2;
2271549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
22728798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
22738798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
22740ac07820SSatish Balay   if (oldmat->colmap) {
2275aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
22760f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
227748e59246SSatish Balay #else
22780ac07820SSatish Balay     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
22790ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2280549d3d68SSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
228148e59246SSatish Balay #endif
22820ac07820SSatish Balay   } else a->colmap = 0;
22830ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
22840ac07820SSatish Balay     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
22850ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
2286549d3d68SSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
22870ac07820SSatish Balay   } else a->garray = 0;
22880ac07820SSatish Balay 
22890ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
22900ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
22910ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2292e18c124aSSatish Balay 
22930ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
22942e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
22950ac07820SSatish Balay   PLogObjectParent(mat,a->A);
22962e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
22970ac07820SSatish Balay   PLogObjectParent(mat,a->B);
22980ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2299e18c124aSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
23000ac07820SSatish Balay   if (flg) {
23010ac07820SSatish Balay     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
23020ac07820SSatish Balay   }
23030ac07820SSatish Balay   *newmat = mat;
23043a40ed3dSBarry Smith   PetscFunctionReturn(0);
23050ac07820SSatish Balay }
230657b952d6SSatish Balay 
230757b952d6SSatish Balay #include "sys.h"
230857b952d6SSatish Balay 
23095615d1e5SSatish Balay #undef __FUNC__
2310b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ"
231157b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
231257b952d6SSatish Balay {
231357b952d6SSatish Balay   Mat          A;
231457b952d6SSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
231557b952d6SSatish Balay   Scalar       *vals,*buf;
231657b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
231757b952d6SSatish Balay   MPI_Status   status;
2318cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
231957b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2320f1af5d2fSBarry Smith   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
232157b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
232257b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
232357b952d6SSatish Balay 
2324d64ed03dSBarry Smith   PetscFunctionBegin;
2325f1af5d2fSBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
232657b952d6SSatish Balay 
2327d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2328d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
232957b952d6SSatish Balay   if (!rank) {
233057b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2331e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2332a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2333d64ed03dSBarry Smith     if (header[3] < 0) {
2334a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2335d64ed03dSBarry Smith     }
23366c5fab8fSBarry Smith   }
2337d64ed03dSBarry Smith 
2338ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
233957b952d6SSatish Balay   M = header[1]; N = header[2];
234057b952d6SSatish Balay 
2341a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
234257b952d6SSatish Balay 
234357b952d6SSatish Balay   /*
234457b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
234557b952d6SSatish Balay      divisible by the blocksize
234657b952d6SSatish Balay   */
234757b952d6SSatish Balay   Mbs        = M/bs;
234857b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
234957b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
235057b952d6SSatish Balay   else                  Mbs++;
235157b952d6SSatish Balay   if (extra_rows &&!rank) {
2352b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
235357b952d6SSatish Balay   }
2354537820f0SBarry Smith 
235557b952d6SSatish Balay   /* determine ownership of all rows */
235657b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
235757b952d6SSatish Balay   m   = mbs * bs;
2358cee3aa6bSSatish Balay   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2359cee3aa6bSSatish Balay   browners = rowners + size + 1;
2360ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
236157b952d6SSatish Balay   rowners[0] = 0;
2362cee3aa6bSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2363cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
236457b952d6SSatish Balay   rstart = rowners[rank];
236557b952d6SSatish Balay   rend   = rowners[rank+1];
236657b952d6SSatish Balay 
236757b952d6SSatish Balay   /* distribute row lengths to all processors */
236857b952d6SSatish Balay   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
236957b952d6SSatish Balay   if (!rank) {
237057b952d6SSatish Balay     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2371e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
237257b952d6SSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
237357b952d6SSatish Balay     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2374cee3aa6bSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2375ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2376606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2377d64ed03dSBarry Smith   } else {
2378ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
237957b952d6SSatish Balay   }
238057b952d6SSatish Balay 
238157b952d6SSatish Balay   if (!rank) {
238257b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
238357b952d6SSatish Balay     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2384549d3d68SSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
238557b952d6SSatish Balay     for (i=0; i<size; i++) {
238657b952d6SSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
238757b952d6SSatish Balay         procsnz[i] += rowlengths[j];
238857b952d6SSatish Balay       }
238957b952d6SSatish Balay     }
2390606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
239157b952d6SSatish Balay 
239257b952d6SSatish Balay     /* determine max buffer needed and allocate it */
239357b952d6SSatish Balay     maxnz = 0;
239457b952d6SSatish Balay     for (i=0; i<size; i++) {
239557b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
239657b952d6SSatish Balay     }
239757b952d6SSatish Balay     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
239857b952d6SSatish Balay 
239957b952d6SSatish Balay     /* read in my part of the matrix column indices  */
240057b952d6SSatish Balay     nz = procsnz[0];
240157b952d6SSatish Balay     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
240257b952d6SSatish Balay     mycols = ibuf;
2403cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2404e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2405cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2406cee3aa6bSSatish Balay 
240757b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
240857b952d6SSatish Balay     for (i=1; i<size-1; i++) {
240957b952d6SSatish Balay       nz   = procsnz[i];
2410e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2411ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
241257b952d6SSatish Balay     }
241357b952d6SSatish Balay     /* read in the stuff for the last proc */
241457b952d6SSatish Balay     if (size != 1) {
241557b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2416e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
241757b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2418ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
241957b952d6SSatish Balay     }
2420606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2421d64ed03dSBarry Smith   } else {
242257b952d6SSatish Balay     /* determine buffer space needed for message */
242357b952d6SSatish Balay     nz = 0;
242457b952d6SSatish Balay     for (i=0; i<m; i++) {
242557b952d6SSatish Balay       nz += locrowlens[i];
242657b952d6SSatish Balay     }
242757b952d6SSatish Balay     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
242857b952d6SSatish Balay     mycols = ibuf;
242957b952d6SSatish Balay     /* receive message of column indices*/
2430ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2431ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2432a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
243357b952d6SSatish Balay   }
243457b952d6SSatish Balay 
243557b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2436cee3aa6bSSatish Balay   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2437cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
243857b952d6SSatish Balay   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2439549d3d68SSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
244057b952d6SSatish Balay   masked1 = mask    + Mbs;
244157b952d6SSatish Balay   masked2 = masked1 + Mbs;
244257b952d6SSatish Balay   rowcount = 0; nzcount = 0;
244357b952d6SSatish Balay   for (i=0; i<mbs; i++) {
244457b952d6SSatish Balay     dcount  = 0;
244557b952d6SSatish Balay     odcount = 0;
244657b952d6SSatish Balay     for (j=0; j<bs; j++) {
244757b952d6SSatish Balay       kmax = locrowlens[rowcount];
244857b952d6SSatish Balay       for (k=0; k<kmax; k++) {
244957b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
245057b952d6SSatish Balay         if (!mask[tmp]) {
245157b952d6SSatish Balay           mask[tmp] = 1;
245257b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
245357b952d6SSatish Balay           else masked1[dcount++] = tmp;
245457b952d6SSatish Balay         }
245557b952d6SSatish Balay       }
245657b952d6SSatish Balay       rowcount++;
245757b952d6SSatish Balay     }
2458cee3aa6bSSatish Balay 
245957b952d6SSatish Balay     dlens[i]  = dcount;
246057b952d6SSatish Balay     odlens[i] = odcount;
2461cee3aa6bSSatish Balay 
246257b952d6SSatish Balay     /* zero out the mask elements we set */
246357b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
246457b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
246557b952d6SSatish Balay   }
2466cee3aa6bSSatish Balay 
246757b952d6SSatish Balay   /* create our matrix */
2468549d3d68SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
246957b952d6SSatish Balay   A = *newmat;
24706d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
247157b952d6SSatish Balay 
247257b952d6SSatish Balay   if (!rank) {
247357b952d6SSatish Balay     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
247457b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
247557b952d6SSatish Balay     nz = procsnz[0];
247657b952d6SSatish Balay     vals = buf;
2477cee3aa6bSSatish Balay     mycols = ibuf;
2478cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2479e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2480cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2481537820f0SBarry Smith 
248257b952d6SSatish Balay     /* insert into matrix */
248357b952d6SSatish Balay     jj      = rstart*bs;
248457b952d6SSatish Balay     for (i=0; i<m; i++) {
24853eda8832SBarry Smith       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
248657b952d6SSatish Balay       mycols += locrowlens[i];
248757b952d6SSatish Balay       vals   += locrowlens[i];
248857b952d6SSatish Balay       jj++;
248957b952d6SSatish Balay     }
249057b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
249157b952d6SSatish Balay     for (i=1; i<size-1; i++) {
249257b952d6SSatish Balay       nz   = procsnz[i];
249357b952d6SSatish Balay       vals = buf;
2494e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2495ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
249657b952d6SSatish Balay     }
249757b952d6SSatish Balay     /* the last proc */
249857b952d6SSatish Balay     if (size != 1){
249957b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2500cee3aa6bSSatish Balay       vals = buf;
2501e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
250257b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2503ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
250457b952d6SSatish Balay     }
2505606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2506d64ed03dSBarry Smith   } else {
250757b952d6SSatish Balay     /* receive numeric values */
250857b952d6SSatish Balay     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
250957b952d6SSatish Balay 
251057b952d6SSatish Balay     /* receive message of values*/
251157b952d6SSatish Balay     vals   = buf;
2512cee3aa6bSSatish Balay     mycols = ibuf;
2513ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2514ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2515a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
251657b952d6SSatish Balay 
251757b952d6SSatish Balay     /* insert into matrix */
251857b952d6SSatish Balay     jj      = rstart*bs;
2519cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
25203eda8832SBarry Smith       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
252157b952d6SSatish Balay       mycols += locrowlens[i];
252257b952d6SSatish Balay       vals   += locrowlens[i];
252357b952d6SSatish Balay       jj++;
252457b952d6SSatish Balay     }
252557b952d6SSatish Balay   }
2526606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2527606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2528606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2529606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2530606d414cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2531606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
25326d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25336d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25343a40ed3dSBarry Smith   PetscFunctionReturn(0);
253557b952d6SSatish Balay }
253657b952d6SSatish Balay 
2537133cdb44SSatish Balay #undef __FUNC__
2538b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor"
2539133cdb44SSatish Balay /*@
2540133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2541133cdb44SSatish Balay 
2542133cdb44SSatish Balay    Input Parameters:
2543133cdb44SSatish Balay .  mat  - the matrix
2544133cdb44SSatish Balay .  fact - factor
2545133cdb44SSatish Balay 
2546fee21e36SBarry Smith    Collective on Mat
2547fee21e36SBarry Smith 
25488c890885SBarry Smith    Level: advanced
25498c890885SBarry Smith 
2550133cdb44SSatish Balay   Notes:
2551133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2552133cdb44SSatish Balay 
2553133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2554133cdb44SSatish Balay 
2555133cdb44SSatish Balay .seealso: MatSetOption()
2556133cdb44SSatish Balay @*/
2557329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2558133cdb44SSatish Balay {
255925fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2560133cdb44SSatish Balay 
2561133cdb44SSatish Balay   PetscFunctionBegin;
2562133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
256325fdafccSSatish Balay   if (mat->type != MATMPIBAIJ) {
2564133cdb44SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2565133cdb44SSatish Balay   }
2566133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2567133cdb44SSatish Balay   baij->ht_fact = fact;
2568133cdb44SSatish Balay   PetscFunctionReturn(0);
2569133cdb44SSatish Balay }
2570