xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 54750694bbef7523d49f1dcfdab14d6260091406)
1*54750694SBarry Smith /*$Id: mpibaij.c,v 1.190 2000/04/10 04:25:49 bsmith Exp bsmith $*/
279bdfe76SSatish Balay 
377ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
579bdfe76SSatish Balay 
657b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
757b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
8d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
97b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
1693fea6afSBarry Smith extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*);
1793fea6afSBarry Smith 
1893fea6afSBarry Smith /*  UGLY, ugly, ugly
1993fea6afSBarry Smith    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
2093fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
2193fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2293fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2393fea6afSBarry Smith    into the single precision data structures.
2493fea6afSBarry Smith */
2593fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2693fea6afSBarry Smith extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27*54750694SBarry Smith extern int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28*54750694SBarry Smith extern int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
2993fea6afSBarry Smith #else
3093fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ
3193fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ
3293fea6afSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
3393fea6afSBarry Smith #endif
343b2fbd54SBarry Smith 
357fc3c18eSBarry Smith EXTERN_C_BEGIN
367fc3c18eSBarry Smith #undef __FUNC__
37b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ"
387fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat)
397fc3c18eSBarry Smith {
407fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
417fc3c18eSBarry Smith   int         ierr;
427fc3c18eSBarry Smith 
437fc3c18eSBarry Smith   PetscFunctionBegin;
447fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
457fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
467fc3c18eSBarry Smith   PetscFunctionReturn(0);
477fc3c18eSBarry Smith }
487fc3c18eSBarry Smith EXTERN_C_END
497fc3c18eSBarry Smith 
507fc3c18eSBarry Smith EXTERN_C_BEGIN
517fc3c18eSBarry Smith #undef __FUNC__
52b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ"
537fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat)
547fc3c18eSBarry Smith {
557fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
567fc3c18eSBarry Smith   int         ierr;
577fc3c18eSBarry Smith 
587fc3c18eSBarry Smith   PetscFunctionBegin;
597fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
607fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
617fc3c18eSBarry Smith   PetscFunctionReturn(0);
627fc3c18eSBarry Smith }
637fc3c18eSBarry Smith EXTERN_C_END
647fc3c18eSBarry Smith 
65537820f0SBarry Smith /*
66537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
6757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
6857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
6957b952d6SSatish Balay    length of colmap equals the global matrix length.
7057b952d6SSatish Balay */
715615d1e5SSatish Balay #undef __FUNC__
72b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private"
7357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
7457b952d6SSatish Balay {
7557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
7657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
77dc2900e9SSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
7857b952d6SSatish Balay 
79d64ed03dSBarry Smith   PetscFunctionBegin;
80aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
810f5bd95cSBarry Smith   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
8248e59246SSatish Balay   for (i=0; i<nbs; i++){
830f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
8448e59246SSatish Balay   }
8548e59246SSatish Balay #else
86758f045eSSatish Balay   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
8757b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
88549d3d68SSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
89928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
9048e59246SSatish Balay #endif
913a40ed3dSBarry Smith   PetscFunctionReturn(0);
9257b952d6SSatish Balay }
9357b952d6SSatish Balay 
9480c1aa95SSatish Balay #define CHUNKSIZE  10
9580c1aa95SSatish Balay 
96f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
9780c1aa95SSatish Balay { \
9880c1aa95SSatish Balay  \
9980c1aa95SSatish Balay     brow = row/bs;  \
10080c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
101ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
10280c1aa95SSatish Balay       bcol = col/bs; \
10380c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
104ab26458aSBarry Smith       low = 0; high = nrow; \
105ab26458aSBarry Smith       while (high-low > 3) { \
106ab26458aSBarry Smith         t = (low+high)/2; \
107ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
108ab26458aSBarry Smith         else              low  = t; \
109ab26458aSBarry Smith       } \
110ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
11180c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
11280c1aa95SSatish Balay         if (rp[_i] == bcol) { \
11380c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
114eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
115eada6651SSatish Balay           else                    *bap  = value;  \
116ac7a638eSSatish Balay           goto a_noinsert; \
11780c1aa95SSatish Balay         } \
11880c1aa95SSatish Balay       } \
11989280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
120a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
12180c1aa95SSatish Balay       if (nrow >= rmax) { \
12280c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
12380c1aa95SSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
1243eda8832SBarry Smith         MatScalar *new_a; \
12580c1aa95SSatish Balay  \
126a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
12789280ab3SLois Curfman McInnes  \
12880c1aa95SSatish Balay         /* malloc new storage space */ \
1293eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
1303eda8832SBarry Smith         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
13180c1aa95SSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
13280c1aa95SSatish Balay         new_i   = new_j + new_nz; \
13380c1aa95SSatish Balay  \
13480c1aa95SSatish Balay         /* copy over old data into new slots */ \
13580c1aa95SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
13680c1aa95SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
137549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
13880c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
1393eda8832SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
1403eda8832SBarry Smith         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
142549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
1433eda8832SBarry Smith                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
14480c1aa95SSatish Balay         /* free up old matrix storage */ \
145606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
146606d414cSSatish Balay         if (!a->singlemalloc) { \
147606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
148606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
149606d414cSSatish Balay         } \
15080c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1517c922b88SBarry Smith         a->singlemalloc = PETSC_TRUE; \
15280c1aa95SSatish Balay  \
15380c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
154ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
1553eda8832SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
15680c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
15780c1aa95SSatish Balay         a->reallocs++; \
15880c1aa95SSatish Balay         a->nz++; \
15980c1aa95SSatish Balay       } \
16080c1aa95SSatish Balay       N = nrow++ - 1;  \
16180c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
16280c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
16380c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1643eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
16580c1aa95SSatish Balay       } \
1663eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
16780c1aa95SSatish Balay       rp[_i]                      = bcol;  \
16880c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
169ac7a638eSSatish Balay       a_noinsert:; \
17080c1aa95SSatish Balay     ailen[brow] = nrow; \
17180c1aa95SSatish Balay }
17257b952d6SSatish Balay 
173ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
174ac7a638eSSatish Balay { \
175ac7a638eSSatish Balay     brow = row/bs;  \
176ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
177ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
178ac7a638eSSatish Balay       bcol = col/bs; \
179ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
180ac7a638eSSatish Balay       low = 0; high = nrow; \
181ac7a638eSSatish Balay       while (high-low > 3) { \
182ac7a638eSSatish Balay         t = (low+high)/2; \
183ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
184ac7a638eSSatish Balay         else              low  = t; \
185ac7a638eSSatish Balay       } \
186ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
187ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
188ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
189ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
190ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
191ac7a638eSSatish Balay           else                    *bap  = value;  \
192ac7a638eSSatish Balay           goto b_noinsert; \
193ac7a638eSSatish Balay         } \
194ac7a638eSSatish Balay       } \
19589280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
196a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
197ac7a638eSSatish Balay       if (nrow >= rmax) { \
198ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
19974c639caSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
2003eda8832SBarry Smith         MatScalar *new_a; \
201ac7a638eSSatish Balay  \
202a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
20389280ab3SLois Curfman McInnes  \
204ac7a638eSSatish Balay         /* malloc new storage space */ \
2053eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
2063eda8832SBarry Smith         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
207ac7a638eSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
208ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
209ac7a638eSSatish Balay  \
210ac7a638eSSatish Balay         /* copy over old data into new slots */ \
211ac7a638eSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
21274c639caSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
213549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
214ac7a638eSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
2153eda8832SBarry Smith         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
2163eda8832SBarry Smith         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
2173eda8832SBarry Smith         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
218549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
2193eda8832SBarry Smith                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
220ac7a638eSSatish Balay         /* free up old matrix storage */ \
221606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
222606d414cSSatish Balay         if (!b->singlemalloc) { \
223606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
224606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
225606d414cSSatish Balay         } \
22674c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
2277c922b88SBarry Smith         b->singlemalloc = PETSC_TRUE; \
228ac7a638eSSatish Balay  \
229ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
230ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
2313eda8832SBarry Smith         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
23274c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
23374c639caSSatish Balay         b->reallocs++; \
23474c639caSSatish Balay         b->nz++; \
235ac7a638eSSatish Balay       } \
236ac7a638eSSatish Balay       N = nrow++ - 1;  \
237ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
238ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
239ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2403eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
241ac7a638eSSatish Balay       } \
2423eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
243ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
244ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
245ac7a638eSSatish Balay       b_noinsert:; \
246ac7a638eSSatish Balay     bilen[brow] = nrow; \
247ac7a638eSSatish Balay }
248ac7a638eSSatish Balay 
24993fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2505615d1e5SSatish Balay #undef __FUNC__
251b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ"
252ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
25357b952d6SSatish Balay {
25493fea6afSBarry Smith   int       ierr,i,N = m*n;
25593fea6afSBarry Smith   MatScalar *vsingle = (MatScalar*)v;
25693fea6afSBarry Smith 
25793fea6afSBarry Smith   PetscFunctionBegin;
25893fea6afSBarry Smith   for (i=0; i<N; i++) {
25993fea6afSBarry Smith     vsingle[i] = v[i];
26093fea6afSBarry Smith   }
26193fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
26293fea6afSBarry Smith   PetscFunctionReturn(0);
26393fea6afSBarry Smith }
26493fea6afSBarry Smith 
26593fea6afSBarry Smith #undef __FUNC__
26693fea6afSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ"
26793fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
26893fea6afSBarry Smith {
26993fea6afSBarry Smith   int       ierr,i,N = m*n*((Mat_MPIBAIJ*)mat->data)->bs2;
27093fea6afSBarry Smith   MatScalar *vsingle = (MatScalar*)v;
27193fea6afSBarry Smith 
27293fea6afSBarry Smith   PetscFunctionBegin;
27393fea6afSBarry Smith   for (i=0; i<N; i++) {
27493fea6afSBarry Smith     vsingle[i] = v[i];
27593fea6afSBarry Smith   }
27693fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
27793fea6afSBarry Smith   PetscFunctionReturn(0);
27893fea6afSBarry Smith }
27993fea6afSBarry Smith #endif
28093fea6afSBarry Smith 
28193fea6afSBarry Smith #undef __FUNC__
28293fea6afSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ"
28393fea6afSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
28493fea6afSBarry Smith {
28557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
28693fea6afSBarry Smith   MatScalar   value;
2874fa0d573SSatish Balay   int         ierr,i,j,row,col;
2884fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2894fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2904fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
29157b952d6SSatish Balay 
292eada6651SSatish Balay   /* Some Variables required in the macro */
29380c1aa95SSatish Balay   Mat         A = baij->A;
29480c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
295ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
2963eda8832SBarry Smith   MatScalar   *aa=a->a;
297ac7a638eSSatish Balay 
298ac7a638eSSatish Balay   Mat         B = baij->B;
299ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
300ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3013eda8832SBarry Smith   MatScalar   *ba=b->a;
302ac7a638eSSatish Balay 
303ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
304ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
3053eda8832SBarry Smith   MatScalar   *ap,*bap;
30680c1aa95SSatish Balay 
307d64ed03dSBarry Smith   PetscFunctionBegin;
30857b952d6SSatish Balay   for (i=0; i<m; i++) {
3095ef9f2a5SBarry Smith     if (im[i] < 0) continue;
310aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
311a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
312639f9d9dSBarry Smith #endif
31357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
31457b952d6SSatish Balay       row = im[i] - rstart_orig;
31557b952d6SSatish Balay       for (j=0; j<n; j++) {
31657b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
31757b952d6SSatish Balay           col = in[j] - cstart_orig;
31857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
319f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
32080c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
32173959e64SBarry Smith         } else if (in[j] < 0) continue;
322aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
323a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
324639f9d9dSBarry Smith #endif
32557b952d6SSatish Balay         else {
32657b952d6SSatish Balay           if (mat->was_assembled) {
327905e6a2fSBarry Smith             if (!baij->colmap) {
328905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
329905e6a2fSBarry Smith             }
330aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
3310f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
332fa46199cSSatish Balay             col  = col - 1 + in[j]%bs;
33348e59246SSatish Balay #else
334905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
33548e59246SSatish Balay #endif
33657b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
33757b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
33857b952d6SSatish Balay               col =  in[j];
3399bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
3409bf004c3SSatish Balay               B = baij->B;
3419bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
3429bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
3439bf004c3SSatish Balay               ba=b->a;
34457b952d6SSatish Balay             }
345d64ed03dSBarry Smith           } else col = in[j];
34657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
347ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
348ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
34957b952d6SSatish Balay         }
35057b952d6SSatish Balay       }
351d64ed03dSBarry Smith     } else {
35290f02eecSBarry Smith       if (!baij->donotstash) {
353ff2fd236SBarry Smith         if (roworiented) {
3548798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
355ff2fd236SBarry Smith         } else {
3568798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
35757b952d6SSatish Balay         }
35857b952d6SSatish Balay       }
35957b952d6SSatish Balay     }
36090f02eecSBarry Smith   }
3613a40ed3dSBarry Smith   PetscFunctionReturn(0);
36257b952d6SSatish Balay }
36357b952d6SSatish Balay 
364ab26458aSBarry Smith #undef __FUNC__
365b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ"
36693fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
367ab26458aSBarry Smith {
368ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
36993fea6afSBarry Smith   MatScalar   *value,*barray=baij->barray;
3707ef1d9bdSSatish Balay   int         ierr,i,j,ii,jj,row,col;
371ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
372ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
373ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
374ab26458aSBarry Smith 
375b16ae2b1SBarry Smith   PetscFunctionBegin;
37630793edcSSatish Balay   if(!barray) {
37793fea6afSBarry Smith     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
37830793edcSSatish Balay   }
37930793edcSSatish Balay 
380ab26458aSBarry Smith   if (roworiented) {
381ab26458aSBarry Smith     stepval = (n-1)*bs;
382ab26458aSBarry Smith   } else {
383ab26458aSBarry Smith     stepval = (m-1)*bs;
384ab26458aSBarry Smith   }
385ab26458aSBarry Smith   for (i=0; i<m; i++) {
3865ef9f2a5SBarry Smith     if (im[i] < 0) continue;
387aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
3885ef9f2a5SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
389ab26458aSBarry Smith #endif
390ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
391ab26458aSBarry Smith       row = im[i] - rstart;
392ab26458aSBarry Smith       for (j=0; j<n; j++) {
39315b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
39415b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
39515b57d14SSatish Balay           barray = v + i*bs2;
39615b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
39715b57d14SSatish Balay           barray = v + j*bs2;
39815b57d14SSatish Balay         } else { /* Here a copy is required */
399ab26458aSBarry Smith           if (roworiented) {
400ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
401ab26458aSBarry Smith           } else {
402ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
403abef11f7SSatish Balay           }
40447513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
40547513183SBarry Smith             for (jj=0; jj<bs; jj++) {
40630793edcSSatish Balay               *barray++  = *value++;
40747513183SBarry Smith             }
40847513183SBarry Smith           }
40930793edcSSatish Balay           barray -=bs2;
41015b57d14SSatish Balay         }
411abef11f7SSatish Balay 
412abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
413abef11f7SSatish Balay           col  = in[j] - cstart;
41493fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
415ab26458aSBarry Smith         }
4165ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
417aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4185ef9f2a5SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
419ab26458aSBarry Smith #endif
420ab26458aSBarry Smith         else {
421ab26458aSBarry Smith           if (mat->was_assembled) {
422ab26458aSBarry Smith             if (!baij->colmap) {
423ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
424ab26458aSBarry Smith             }
425a5eb4965SSatish Balay 
426aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
427aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
428fa46199cSSatish Balay             { int data;
4290f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4300f5bd95cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
431fa46199cSSatish Balay             }
43248e59246SSatish Balay #else
4330f5bd95cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
434a5eb4965SSatish Balay #endif
43548e59246SSatish Balay #endif
436aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4370f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
438fa46199cSSatish Balay             col  = (col - 1)/bs;
43948e59246SSatish Balay #else
440a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
44148e59246SSatish Balay #endif
442ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
443ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
444ab26458aSBarry Smith               col =  in[j];
445ab26458aSBarry Smith             }
446ab26458aSBarry Smith           }
447ab26458aSBarry Smith           else col = in[j];
44893fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
449ab26458aSBarry Smith         }
450ab26458aSBarry Smith       }
451d64ed03dSBarry Smith     } else {
452ab26458aSBarry Smith       if (!baij->donotstash) {
453ff2fd236SBarry Smith         if (roworiented) {
4548798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
455ff2fd236SBarry Smith         } else {
4568798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
457ff2fd236SBarry Smith         }
458abef11f7SSatish Balay       }
459ab26458aSBarry Smith     }
460ab26458aSBarry Smith   }
4613a40ed3dSBarry Smith   PetscFunctionReturn(0);
462ab26458aSBarry Smith }
4630bdbc534SSatish Balay #define HASH_KEY 0.6180339887
464c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
465c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
466c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
4675615d1e5SSatish Balay #undef __FUNC__
468b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT"
4690bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4700bdbc534SSatish Balay {
4710bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
4720bdbc534SSatish Balay   int         ierr,i,j,row,col;
4730bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
474c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
475c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
476329f5518SBarry Smith   PetscReal   tmp;
4773eda8832SBarry Smith   MatScalar   ** HD = baij->hd,value;
478aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4794a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4804a15367fSSatish Balay #endif
4810bdbc534SSatish Balay 
4820bdbc534SSatish Balay   PetscFunctionBegin;
4830bdbc534SSatish Balay 
4840bdbc534SSatish Balay   for (i=0; i<m; i++) {
485aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4860bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4870bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4880bdbc534SSatish Balay #endif
4890bdbc534SSatish Balay       row = im[i];
490c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4910bdbc534SSatish Balay       for (j=0; j<n; j++) {
4920bdbc534SSatish Balay         col = in[j];
4933eda8832SBarry Smith         if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m];
4940bdbc534SSatish Balay         /* Look up into the Hash Table */
495c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
496c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4970bdbc534SSatish Balay 
498c2760754SSatish Balay 
499c2760754SSatish Balay         idx = h1;
500aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
501187ce0cbSSatish Balay         insert_ct++;
502187ce0cbSSatish Balay         total_ct++;
503187ce0cbSSatish Balay         if (HT[idx] != key) {
504187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
505187ce0cbSSatish Balay           if (idx == size) {
506187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
507187ce0cbSSatish Balay             if (idx == h1) {
508187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
509187ce0cbSSatish Balay             }
510187ce0cbSSatish Balay           }
511187ce0cbSSatish Balay         }
512187ce0cbSSatish Balay #else
513c2760754SSatish Balay         if (HT[idx] != key) {
514c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
515c2760754SSatish Balay           if (idx == size) {
516c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
517c2760754SSatish Balay             if (idx == h1) {
518c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
519c2760754SSatish Balay             }
520c2760754SSatish Balay           }
521c2760754SSatish Balay         }
522187ce0cbSSatish Balay #endif
523c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
524c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
525c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
5260bdbc534SSatish Balay       }
5270bdbc534SSatish Balay     } else {
5280bdbc534SSatish Balay       if (!baij->donotstash) {
529ff2fd236SBarry Smith         if (roworiented) {
5308798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
531ff2fd236SBarry Smith         } else {
5328798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5330bdbc534SSatish Balay         }
5340bdbc534SSatish Balay       }
5350bdbc534SSatish Balay     }
5360bdbc534SSatish Balay   }
537aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
538187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
539187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
540187ce0cbSSatish Balay #endif
5410bdbc534SSatish Balay   PetscFunctionReturn(0);
5420bdbc534SSatish Balay }
5430bdbc534SSatish Balay 
5440bdbc534SSatish Balay #undef __FUNC__
545b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT"
5460bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
5470bdbc534SSatish Balay {
5480bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
5498798bf22SSatish Balay   int         ierr,i,j,ii,jj,row,col;
5500bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
551b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
552c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
553329f5518SBarry Smith   PetscReal   tmp;
5543eda8832SBarry Smith   MatScalar   **HD = baij->hd,*baij_a;
5553eda8832SBarry Smith   Scalar      *v_t,*value;
556aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5574a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5584a15367fSSatish Balay #endif
5590bdbc534SSatish Balay 
560d0a41580SSatish Balay   PetscFunctionBegin;
561d0a41580SSatish Balay 
5620bdbc534SSatish Balay   if (roworiented) {
5630bdbc534SSatish Balay     stepval = (n-1)*bs;
5640bdbc534SSatish Balay   } else {
5650bdbc534SSatish Balay     stepval = (m-1)*bs;
5660bdbc534SSatish Balay   }
5670bdbc534SSatish Balay   for (i=0; i<m; i++) {
568aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5690bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
5700bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
5710bdbc534SSatish Balay #endif
5720bdbc534SSatish Balay     row   = im[i];
573187ce0cbSSatish Balay     v_t   = v + i*bs2;
574c2760754SSatish Balay     if (row >= rstart && row < rend) {
5750bdbc534SSatish Balay       for (j=0; j<n; j++) {
5760bdbc534SSatish Balay         col = in[j];
5770bdbc534SSatish Balay 
5780bdbc534SSatish Balay         /* Look up into the Hash Table */
579c2760754SSatish Balay         key = row*Nbs+col+1;
580c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5810bdbc534SSatish Balay 
582c2760754SSatish Balay         idx = h1;
583aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
584187ce0cbSSatish Balay         total_ct++;
585187ce0cbSSatish Balay         insert_ct++;
586187ce0cbSSatish Balay        if (HT[idx] != key) {
587187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
588187ce0cbSSatish Balay           if (idx == size) {
589187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
590187ce0cbSSatish Balay             if (idx == h1) {
591187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
592187ce0cbSSatish Balay             }
593187ce0cbSSatish Balay           }
594187ce0cbSSatish Balay         }
595187ce0cbSSatish Balay #else
596c2760754SSatish Balay         if (HT[idx] != key) {
597c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
598c2760754SSatish Balay           if (idx == size) {
599c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
600c2760754SSatish Balay             if (idx == h1) {
601c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
602c2760754SSatish Balay             }
603c2760754SSatish Balay           }
604c2760754SSatish Balay         }
605187ce0cbSSatish Balay #endif
606c2760754SSatish Balay         baij_a = HD[idx];
6070bdbc534SSatish Balay         if (roworiented) {
608c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
609187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
610187ce0cbSSatish Balay           value = v_t;
611187ce0cbSSatish Balay           v_t  += bs;
612fef45726SSatish Balay           if (addv == ADD_VALUES) {
613c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
614c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
615fef45726SSatish Balay                 baij_a[jj]  += *value++;
616b4cc0f5aSSatish Balay               }
617b4cc0f5aSSatish Balay             }
618fef45726SSatish Balay           } else {
619c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
620c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
621fef45726SSatish Balay                 baij_a[jj]  = *value++;
622fef45726SSatish Balay               }
623fef45726SSatish Balay             }
624fef45726SSatish Balay           }
6250bdbc534SSatish Balay         } else {
6260bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
627fef45726SSatish Balay           if (addv == ADD_VALUES) {
628b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
6290bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
630fef45726SSatish Balay                 baij_a[jj]  += *value++;
631fef45726SSatish Balay               }
632fef45726SSatish Balay             }
633fef45726SSatish Balay           } else {
634fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
635fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
636fef45726SSatish Balay                 baij_a[jj]  = *value++;
637fef45726SSatish Balay               }
638b4cc0f5aSSatish Balay             }
6390bdbc534SSatish Balay           }
6400bdbc534SSatish Balay         }
6410bdbc534SSatish Balay       }
6420bdbc534SSatish Balay     } else {
6430bdbc534SSatish Balay       if (!baij->donotstash) {
6440bdbc534SSatish Balay         if (roworiented) {
6458798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6460bdbc534SSatish Balay         } else {
6478798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6480bdbc534SSatish Balay         }
6490bdbc534SSatish Balay       }
6500bdbc534SSatish Balay     }
6510bdbc534SSatish Balay   }
652aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
653187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
654187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
655187ce0cbSSatish Balay #endif
6560bdbc534SSatish Balay   PetscFunctionReturn(0);
6570bdbc534SSatish Balay }
658133cdb44SSatish Balay 
6590bdbc534SSatish Balay #undef __FUNC__
660b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ"
661ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
662d6de1c52SSatish Balay {
663d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
664d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
66548e59246SSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
666d6de1c52SSatish Balay 
667133cdb44SSatish Balay   PetscFunctionBegin;
668d6de1c52SSatish Balay   for (i=0; i<m; i++) {
669a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
670a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
671d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
672d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
673d6de1c52SSatish Balay       for (j=0; j<n; j++) {
674a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
675a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
676d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
677d6de1c52SSatish Balay           col = idxn[j] - bscstart;
67898dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
679d64ed03dSBarry Smith         } else {
680905e6a2fSBarry Smith           if (!baij->colmap) {
681905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
682905e6a2fSBarry Smith           }
683aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
6840f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
685fa46199cSSatish Balay           data --;
68648e59246SSatish Balay #else
68748e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
68848e59246SSatish Balay #endif
68948e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
690d9d09a02SSatish Balay           else {
69148e59246SSatish Balay             col  = data + idxn[j]%bs;
69298dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
693d6de1c52SSatish Balay           }
694d6de1c52SSatish Balay         }
695d6de1c52SSatish Balay       }
696d64ed03dSBarry Smith     } else {
697a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
698d6de1c52SSatish Balay     }
699d6de1c52SSatish Balay   }
7003a40ed3dSBarry Smith  PetscFunctionReturn(0);
701d6de1c52SSatish Balay }
702d6de1c52SSatish Balay 
7035615d1e5SSatish Balay #undef __FUNC__
704b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ"
705329f5518SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
706d6de1c52SSatish Balay {
707d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
708d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
709acdf5bf4SSatish Balay   int        ierr,i,bs2=baij->bs2;
710329f5518SBarry Smith   PetscReal  sum = 0.0;
7113eda8832SBarry Smith   MatScalar  *v;
712d6de1c52SSatish Balay 
713d64ed03dSBarry Smith   PetscFunctionBegin;
714d6de1c52SSatish Balay   if (baij->size == 1) {
715d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
716d6de1c52SSatish Balay   } else {
717d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
718d6de1c52SSatish Balay       v = amat->a;
719d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++) {
720aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
721329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
722d6de1c52SSatish Balay #else
723d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
724d6de1c52SSatish Balay #endif
725d6de1c52SSatish Balay       }
726d6de1c52SSatish Balay       v = bmat->a;
727d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++) {
728aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
729329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
730d6de1c52SSatish Balay #else
731d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
732d6de1c52SSatish Balay #endif
733d6de1c52SSatish Balay       }
734ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
735d6de1c52SSatish Balay       *norm = sqrt(*norm);
736d64ed03dSBarry Smith     } else {
737e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
738d6de1c52SSatish Balay     }
739d64ed03dSBarry Smith   }
7403a40ed3dSBarry Smith   PetscFunctionReturn(0);
741d6de1c52SSatish Balay }
74257b952d6SSatish Balay 
743bd7f49f5SSatish Balay 
744fef45726SSatish Balay /*
745fef45726SSatish Balay   Creates the hash table, and sets the table
746fef45726SSatish Balay   This table is created only once.
747fef45726SSatish Balay   If new entried need to be added to the matrix
748fef45726SSatish Balay   then the hash table has to be destroyed and
749fef45726SSatish Balay   recreated.
750fef45726SSatish Balay */
751fef45726SSatish Balay #undef __FUNC__
752b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private"
753329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
754596b8d2eSBarry Smith {
755596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
756596b8d2eSBarry Smith   Mat         A = baij->A,B=baij->B;
757596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
7580bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
759549d3d68SSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
760187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
761fef45726SSatish Balay   int         *HT,key;
7623eda8832SBarry Smith   MatScalar   **HD;
763329f5518SBarry Smith   PetscReal   tmp;
764aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
7654a15367fSSatish Balay   int         ct=0,max=0;
7664a15367fSSatish Balay #endif
767fef45726SSatish Balay 
768d64ed03dSBarry Smith   PetscFunctionBegin;
7690bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
7700bdbc534SSatish Balay   size = baij->ht_size;
771fef45726SSatish Balay 
7720bdbc534SSatish Balay   if (baij->ht) {
7730bdbc534SSatish Balay     PetscFunctionReturn(0);
774596b8d2eSBarry Smith   }
7750bdbc534SSatish Balay 
776fef45726SSatish Balay   /* Allocate Memory for Hash Table */
7773eda8832SBarry Smith   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
778b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
779b9e4cc15SSatish Balay   HD = baij->hd;
780a07cd24cSSatish Balay   HT = baij->ht;
781b9e4cc15SSatish Balay 
782b9e4cc15SSatish Balay 
783549d3d68SSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
7840bdbc534SSatish Balay 
785596b8d2eSBarry Smith 
786596b8d2eSBarry Smith   /* Loop Over A */
7870bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
788596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
7890bdbc534SSatish Balay       row = i+rstart;
7900bdbc534SSatish Balay       col = aj[j]+cstart;
791596b8d2eSBarry Smith 
792187ce0cbSSatish Balay       key = row*Nbs + col + 1;
793c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7940bdbc534SSatish Balay       for (k=0; k<size; k++){
7950bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7960bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7970bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
798596b8d2eSBarry Smith           break;
799aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
800187ce0cbSSatish Balay         } else {
801187ce0cbSSatish Balay           ct++;
802187ce0cbSSatish Balay #endif
803596b8d2eSBarry Smith         }
804187ce0cbSSatish Balay       }
805aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
806187ce0cbSSatish Balay       if (k> max) max = k;
807187ce0cbSSatish Balay #endif
808596b8d2eSBarry Smith     }
809596b8d2eSBarry Smith   }
810596b8d2eSBarry Smith   /* Loop Over B */
8110bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
812596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
8130bdbc534SSatish Balay       row = i+rstart;
8140bdbc534SSatish Balay       col = garray[bj[j]];
815187ce0cbSSatish Balay       key = row*Nbs + col + 1;
816c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8170bdbc534SSatish Balay       for (k=0; k<size; k++){
8180bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8190bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8200bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
821596b8d2eSBarry Smith           break;
822aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
823187ce0cbSSatish Balay         } else {
824187ce0cbSSatish Balay           ct++;
825187ce0cbSSatish Balay #endif
826596b8d2eSBarry Smith         }
827187ce0cbSSatish Balay       }
828aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
829187ce0cbSSatish Balay       if (k> max) max = k;
830187ce0cbSSatish Balay #endif
831596b8d2eSBarry Smith     }
832596b8d2eSBarry Smith   }
833596b8d2eSBarry Smith 
834596b8d2eSBarry Smith   /* Print Summary */
835aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
836c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
837596b8d2eSBarry Smith     if (HT[i]) {j++;}
838c38d4ed2SBarry Smith   }
839329f5518SBarry Smith   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
840187ce0cbSSatish Balay #endif
8413a40ed3dSBarry Smith   PetscFunctionReturn(0);
842596b8d2eSBarry Smith }
84357b952d6SSatish Balay 
844bbb85fb3SSatish Balay #undef __FUNC__
845b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ"
846bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
847bbb85fb3SSatish Balay {
848bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
849ff2fd236SBarry Smith   int         ierr,nstash,reallocs;
850bbb85fb3SSatish Balay   InsertMode  addv;
851bbb85fb3SSatish Balay 
852bbb85fb3SSatish Balay   PetscFunctionBegin;
853bbb85fb3SSatish Balay   if (baij->donotstash) {
854bbb85fb3SSatish Balay     PetscFunctionReturn(0);
855bbb85fb3SSatish Balay   }
856bbb85fb3SSatish Balay 
857bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
858bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
859bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
860bbb85fb3SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
861bbb85fb3SSatish Balay   }
862bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
863bbb85fb3SSatish Balay 
8648798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
8658798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
8668798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8675a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
8688798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
8695a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
870bbb85fb3SSatish Balay   PetscFunctionReturn(0);
871bbb85fb3SSatish Balay }
872bbb85fb3SSatish Balay 
873bbb85fb3SSatish Balay #undef __FUNC__
874b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ"
875bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
876bbb85fb3SSatish Balay {
877bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
878a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
879a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
8807c922b88SBarry Smith   int         *row,*col,other_disassembled;
8817c922b88SBarry Smith   PetscTruth  r1,r2,r3;
8823eda8832SBarry Smith   MatScalar   *val;
883bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
884bbb85fb3SSatish Balay 
885bbb85fb3SSatish Balay   PetscFunctionBegin;
886bbb85fb3SSatish Balay   if (!baij->donotstash) {
887a2d1c673SSatish Balay     while (1) {
8888798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
889a2d1c673SSatish Balay       if (!flg) break;
890a2d1c673SSatish Balay 
891bbb85fb3SSatish Balay       for (i=0; i<n;) {
892bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
893bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
894bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
895bbb85fb3SSatish Balay         else       ncols = n-i;
896bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
89793fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
898bbb85fb3SSatish Balay         i = j;
899bbb85fb3SSatish Balay       }
900bbb85fb3SSatish Balay     }
9018798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
902a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
903a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
904a2d1c673SSatish Balay        restore the original flags */
905a2d1c673SSatish Balay     r1 = baij->roworiented;
906a2d1c673SSatish Balay     r2 = a->roworiented;
907a2d1c673SSatish Balay     r3 = b->roworiented;
9087c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
9097c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
9107c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
911a2d1c673SSatish Balay     while (1) {
9128798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
913a2d1c673SSatish Balay       if (!flg) break;
914a2d1c673SSatish Balay 
915a2d1c673SSatish Balay       for (i=0; i<n;) {
916a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
917a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
918a2d1c673SSatish Balay         if (j < n) ncols = j-i;
919a2d1c673SSatish Balay         else       ncols = n-i;
92093fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
921a2d1c673SSatish Balay         i = j;
922a2d1c673SSatish Balay       }
923a2d1c673SSatish Balay     }
9248798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
925a2d1c673SSatish Balay     baij->roworiented = r1;
926a2d1c673SSatish Balay     a->roworiented    = r2;
927a2d1c673SSatish Balay     b->roworiented    = r3;
928bbb85fb3SSatish Balay   }
929bbb85fb3SSatish Balay 
930bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
931bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
932bbb85fb3SSatish Balay 
933bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
934bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
935bbb85fb3SSatish Balay   /*
936bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
937bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
938bbb85fb3SSatish Balay   */
939bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
940bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
941bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
942bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
943bbb85fb3SSatish Balay     }
944bbb85fb3SSatish Balay   }
945bbb85fb3SSatish Balay 
946bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
947bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
948bbb85fb3SSatish Balay   }
949bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
950bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
951bbb85fb3SSatish Balay 
952aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
953bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
954c38d4ed2SBarry Smith     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
955bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
956bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
957bbb85fb3SSatish Balay   }
958bbb85fb3SSatish Balay #endif
959bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
960bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
961bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
962bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
963bbb85fb3SSatish Balay   }
964bbb85fb3SSatish Balay 
965606d414cSSatish Balay   if (baij->rowvalues) {
966606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
967606d414cSSatish Balay     baij->rowvalues = 0;
968606d414cSSatish Balay   }
969bbb85fb3SSatish Balay   PetscFunctionReturn(0);
970bbb85fb3SSatish Balay }
97157b952d6SSatish Balay 
9725615d1e5SSatish Balay #undef __FUNC__
973b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket"
9747b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
97557b952d6SSatish Balay {
97657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
97777ed5343SBarry Smith   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
9786831982aSBarry Smith   PetscTruth   isascii,isdraw;
97957b952d6SSatish Balay 
980d64ed03dSBarry Smith   PetscFunctionBegin;
9816831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
9826831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
9830f5bd95cSBarry Smith   if (isascii) {
984d41123aaSBarry Smith     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
985639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
9864e220ebcSLois Curfman McInnes       MatInfo info;
987d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
988d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
9896831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
9904e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
9916831982aSBarry Smith               baij->bs,(int)info.memory);CHKERRQ(ierr);
992d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
9936831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
994d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
9956831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
9966831982aSBarry Smith       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
99757b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
9983a40ed3dSBarry Smith       PetscFunctionReturn(0);
999d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
10006831982aSBarry Smith       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
10013a40ed3dSBarry Smith       PetscFunctionReturn(0);
100257b952d6SSatish Balay     }
100357b952d6SSatish Balay   }
100457b952d6SSatish Balay 
10050f5bd95cSBarry Smith   if (isdraw) {
100657b952d6SSatish Balay     Draw       draw;
100757b952d6SSatish Balay     PetscTruth isnull;
100877ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
10093a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
101057b952d6SSatish Balay   }
101157b952d6SSatish Balay 
101257b952d6SSatish Balay   if (size == 1) {
101357b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1014d64ed03dSBarry Smith   } else {
101557b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
101657b952d6SSatish Balay     Mat         A;
101757b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
10183eda8832SBarry Smith     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
10193eda8832SBarry Smith     MatScalar   *a;
102057b952d6SSatish Balay 
102157b952d6SSatish Balay     if (!rank) {
102255843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1023d64ed03dSBarry Smith     } else {
102455843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
102557b952d6SSatish Balay     }
102657b952d6SSatish Balay     PLogObjectParent(mat,A);
102757b952d6SSatish Balay 
102857b952d6SSatish Balay     /* copy over the A part */
102957b952d6SSatish Balay     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
103057b952d6SSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
103157b952d6SSatish Balay     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
103257b952d6SSatish Balay 
103357b952d6SSatish Balay     for (i=0; i<mbs; i++) {
103457b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
103557b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
103657b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
103757b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
103857b952d6SSatish Balay         for (k=0; k<bs; k++) {
103993fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1040cee3aa6bSSatish Balay           col++; a += bs;
104157b952d6SSatish Balay         }
104257b952d6SSatish Balay       }
104357b952d6SSatish Balay     }
104457b952d6SSatish Balay     /* copy over the B part */
104557b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
104657b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
104757b952d6SSatish Balay     for (i=0; i<mbs; i++) {
104857b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
104957b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
105057b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
105157b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
105257b952d6SSatish Balay         for (k=0; k<bs; k++) {
105393fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1054cee3aa6bSSatish Balay           col++; a += bs;
105557b952d6SSatish Balay         }
105657b952d6SSatish Balay       }
105757b952d6SSatish Balay     }
1058606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
10596d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
10606d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
106155843e3eSBarry Smith     /*
106255843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
106355843e3eSBarry Smith        synchronized across all processors that share the Draw object
106455843e3eSBarry Smith     */
10656831982aSBarry Smith     if (!rank) {
10666831982aSBarry Smith       Viewer sviewer;
10676831982aSBarry Smith       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
10686831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
10696831982aSBarry Smith       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
107057b952d6SSatish Balay     }
107157b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
107257b952d6SSatish Balay   }
10733a40ed3dSBarry Smith   PetscFunctionReturn(0);
107457b952d6SSatish Balay }
107557b952d6SSatish Balay 
10765615d1e5SSatish Balay #undef __FUNC__
1077b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ"
1078e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer)
107957b952d6SSatish Balay {
108057b952d6SSatish Balay   int        ierr;
10816831982aSBarry Smith   PetscTruth isascii,isdraw,issocket,isbinary;
108257b952d6SSatish Balay 
1083d64ed03dSBarry Smith   PetscFunctionBegin;
10846831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
10856831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
10866831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
10876831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
10880f5bd95cSBarry Smith   if (isascii || isdraw || issocket || isbinary) {
10897b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
10905cd90555SBarry Smith   } else {
10910f5bd95cSBarry Smith     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
109257b952d6SSatish Balay   }
10933a40ed3dSBarry Smith   PetscFunctionReturn(0);
109457b952d6SSatish Balay }
109557b952d6SSatish Balay 
10965615d1e5SSatish Balay #undef __FUNC__
1097b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ"
1098e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
109979bdfe76SSatish Balay {
110079bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
110179bdfe76SSatish Balay   int         ierr;
110279bdfe76SSatish Balay 
1103d64ed03dSBarry Smith   PetscFunctionBegin;
110498dd23e9SBarry Smith 
110598dd23e9SBarry Smith   if (mat->mapping) {
110698dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
110798dd23e9SBarry Smith   }
110898dd23e9SBarry Smith   if (mat->bmapping) {
110998dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
111098dd23e9SBarry Smith   }
111161b13de0SBarry Smith   if (mat->rmap) {
111261b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
111361b13de0SBarry Smith   }
111461b13de0SBarry Smith   if (mat->cmap) {
111561b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
111661b13de0SBarry Smith   }
1117aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1118e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
111979bdfe76SSatish Balay #endif
112079bdfe76SSatish Balay 
11218798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
11228798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
11238798bf22SSatish Balay 
1124606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
112579bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
112679bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1127aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
11280f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
112948e59246SSatish Balay #else
1130606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
113148e59246SSatish Balay #endif
1132606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1133606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1134606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1135606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1136606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1137606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1138606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
113979bdfe76SSatish Balay   PLogObjectDestroy(mat);
114079bdfe76SSatish Balay   PetscHeaderDestroy(mat);
11413a40ed3dSBarry Smith   PetscFunctionReturn(0);
114279bdfe76SSatish Balay }
114379bdfe76SSatish Balay 
11445615d1e5SSatish Balay #undef __FUNC__
1145b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ"
1146ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1147cee3aa6bSSatish Balay {
1148cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
114947b4a8eaSLois Curfman McInnes   int         ierr,nt;
1150cee3aa6bSSatish Balay 
1151d64ed03dSBarry Smith   PetscFunctionBegin;
1152e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
115347b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1154a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
115547b4a8eaSLois Curfman McInnes   }
1156e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
115747b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1158a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
115947b4a8eaSLois Curfman McInnes   }
116043a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1161f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
116243a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1163f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
116443a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
11653a40ed3dSBarry Smith   PetscFunctionReturn(0);
1166cee3aa6bSSatish Balay }
1167cee3aa6bSSatish Balay 
11685615d1e5SSatish Balay #undef __FUNC__
1169b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ"
1170ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1171cee3aa6bSSatish Balay {
1172cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1173cee3aa6bSSatish Balay   int        ierr;
1174d64ed03dSBarry Smith 
1175d64ed03dSBarry Smith   PetscFunctionBegin;
117643a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1177f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
117843a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1179f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
11803a40ed3dSBarry Smith   PetscFunctionReturn(0);
1181cee3aa6bSSatish Balay }
1182cee3aa6bSSatish Balay 
11835615d1e5SSatish Balay #undef __FUNC__
1184b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ"
11857c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1186cee3aa6bSSatish Balay {
1187cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1188cee3aa6bSSatish Balay   int         ierr;
1189cee3aa6bSSatish Balay 
1190d64ed03dSBarry Smith   PetscFunctionBegin;
1191cee3aa6bSSatish Balay   /* do nondiagonal part */
11927c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1193cee3aa6bSSatish Balay   /* send it on its way */
1194537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1195cee3aa6bSSatish Balay   /* do local part */
11967c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1197cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1198cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1199cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1200639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12013a40ed3dSBarry Smith   PetscFunctionReturn(0);
1202cee3aa6bSSatish Balay }
1203cee3aa6bSSatish Balay 
12045615d1e5SSatish Balay #undef __FUNC__
1205b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ"
12067c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1207cee3aa6bSSatish Balay {
1208cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1209cee3aa6bSSatish Balay   int         ierr;
1210cee3aa6bSSatish Balay 
1211d64ed03dSBarry Smith   PetscFunctionBegin;
1212cee3aa6bSSatish Balay   /* do nondiagonal part */
12137c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1214cee3aa6bSSatish Balay   /* send it on its way */
1215537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1216cee3aa6bSSatish Balay   /* do local part */
12177c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1218cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1219cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1220cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1221537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12223a40ed3dSBarry Smith   PetscFunctionReturn(0);
1223cee3aa6bSSatish Balay }
1224cee3aa6bSSatish Balay 
1225cee3aa6bSSatish Balay /*
1226cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1227cee3aa6bSSatish Balay    diagonal block
1228cee3aa6bSSatish Balay */
12295615d1e5SSatish Balay #undef __FUNC__
1230b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ"
1231ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1232cee3aa6bSSatish Balay {
1233cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
12343a40ed3dSBarry Smith   int         ierr;
1235d64ed03dSBarry Smith 
1236d64ed03dSBarry Smith   PetscFunctionBegin;
1237a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
12383a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
12393a40ed3dSBarry Smith   PetscFunctionReturn(0);
1240cee3aa6bSSatish Balay }
1241cee3aa6bSSatish Balay 
12425615d1e5SSatish Balay #undef __FUNC__
1243b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ"
1244ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1245cee3aa6bSSatish Balay {
1246cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1247cee3aa6bSSatish Balay   int         ierr;
1248d64ed03dSBarry Smith 
1249d64ed03dSBarry Smith   PetscFunctionBegin;
1250cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1251cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
12523a40ed3dSBarry Smith   PetscFunctionReturn(0);
1253cee3aa6bSSatish Balay }
1254026e39d0SSatish Balay 
12555615d1e5SSatish Balay #undef __FUNC__
1256b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ"
1257ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
125857b952d6SSatish Balay {
125957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1260d64ed03dSBarry Smith 
1261d64ed03dSBarry Smith   PetscFunctionBegin;
1262bd7f49f5SSatish Balay   if (m) *m = mat->M;
1263bd7f49f5SSatish Balay   if (n) *n = mat->N;
12643a40ed3dSBarry Smith   PetscFunctionReturn(0);
126557b952d6SSatish Balay }
126657b952d6SSatish Balay 
12675615d1e5SSatish Balay #undef __FUNC__
1268b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ"
1269ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
127057b952d6SSatish Balay {
127157b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1272d64ed03dSBarry Smith 
1273d64ed03dSBarry Smith   PetscFunctionBegin;
1274f830108cSBarry Smith   *m = mat->m; *n = mat->n;
12753a40ed3dSBarry Smith   PetscFunctionReturn(0);
127657b952d6SSatish Balay }
127757b952d6SSatish Balay 
12785615d1e5SSatish Balay #undef __FUNC__
1279b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ"
1280ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
128157b952d6SSatish Balay {
128257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1283d64ed03dSBarry Smith 
1284d64ed03dSBarry Smith   PetscFunctionBegin;
1285a21fb8cbSBarry Smith   if (m) *m = mat->rstart*mat->bs;
1286a21fb8cbSBarry Smith   if (n) *n = mat->rend*mat->bs;
12873a40ed3dSBarry Smith   PetscFunctionReturn(0);
128857b952d6SSatish Balay }
128957b952d6SSatish Balay 
12905615d1e5SSatish Balay #undef __FUNC__
1291b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ"
1292acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1293acdf5bf4SSatish Balay {
1294acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1295acdf5bf4SSatish Balay   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1296acdf5bf4SSatish Balay   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1297d9d09a02SSatish Balay   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1298d9d09a02SSatish Balay   int        *cmap,*idx_p,cstart = mat->cstart;
1299acdf5bf4SSatish Balay 
1300d64ed03dSBarry Smith   PetscFunctionBegin;
1301a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1302acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1303acdf5bf4SSatish Balay 
1304acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1305acdf5bf4SSatish Balay     /*
1306acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1307acdf5bf4SSatish Balay     */
1308acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1309bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1310bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1311acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1312acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1313acdf5bf4SSatish Balay     }
1314549d3d68SSatish Balay     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1315acdf5bf4SSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1316acdf5bf4SSatish Balay   }
1317acdf5bf4SSatish Balay 
1318a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1319d9d09a02SSatish Balay   lrow = row - brstart;
1320acdf5bf4SSatish Balay 
1321acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1322acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1323acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1324f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1325f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1326acdf5bf4SSatish Balay   nztot = nzA + nzB;
1327acdf5bf4SSatish Balay 
1328acdf5bf4SSatish Balay   cmap  = mat->garray;
1329acdf5bf4SSatish Balay   if (v  || idx) {
1330acdf5bf4SSatish Balay     if (nztot) {
1331acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1332acdf5bf4SSatish Balay       int imark = -1;
1333acdf5bf4SSatish Balay       if (v) {
1334acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1335acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1336d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1337acdf5bf4SSatish Balay           else break;
1338acdf5bf4SSatish Balay         }
1339acdf5bf4SSatish Balay         imark = i;
1340acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1341acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1342acdf5bf4SSatish Balay       }
1343acdf5bf4SSatish Balay       if (idx) {
1344acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1345acdf5bf4SSatish Balay         if (imark > -1) {
1346acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1347bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1348acdf5bf4SSatish Balay           }
1349acdf5bf4SSatish Balay         } else {
1350acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1351d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1352d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1353acdf5bf4SSatish Balay             else break;
1354acdf5bf4SSatish Balay           }
1355acdf5bf4SSatish Balay           imark = i;
1356acdf5bf4SSatish Balay         }
1357d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1358d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1359acdf5bf4SSatish Balay       }
1360d64ed03dSBarry Smith     } else {
1361d212a18eSSatish Balay       if (idx) *idx = 0;
1362d212a18eSSatish Balay       if (v)   *v   = 0;
1363d212a18eSSatish Balay     }
1364acdf5bf4SSatish Balay   }
1365acdf5bf4SSatish Balay   *nz = nztot;
1366f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1367f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
13683a40ed3dSBarry Smith   PetscFunctionReturn(0);
1369acdf5bf4SSatish Balay }
1370acdf5bf4SSatish Balay 
13715615d1e5SSatish Balay #undef __FUNC__
1372b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ"
1373acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1374acdf5bf4SSatish Balay {
1375acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1376d64ed03dSBarry Smith 
1377d64ed03dSBarry Smith   PetscFunctionBegin;
1378acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1379a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1380acdf5bf4SSatish Balay   }
1381acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
13823a40ed3dSBarry Smith   PetscFunctionReturn(0);
1383acdf5bf4SSatish Balay }
1384acdf5bf4SSatish Balay 
13855615d1e5SSatish Balay #undef __FUNC__
1386b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ"
1387ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
13885a838052SSatish Balay {
13895a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1390d64ed03dSBarry Smith 
1391d64ed03dSBarry Smith   PetscFunctionBegin;
13925a838052SSatish Balay   *bs = baij->bs;
13933a40ed3dSBarry Smith   PetscFunctionReturn(0);
13945a838052SSatish Balay }
13955a838052SSatish Balay 
13965615d1e5SSatish Balay #undef __FUNC__
1397b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ"
1398ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
139958667388SSatish Balay {
140058667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
140158667388SSatish Balay   int         ierr;
1402d64ed03dSBarry Smith 
1403d64ed03dSBarry Smith   PetscFunctionBegin;
140458667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
140558667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14063a40ed3dSBarry Smith   PetscFunctionReturn(0);
140758667388SSatish Balay }
14080ac07820SSatish Balay 
14095615d1e5SSatish Balay #undef __FUNC__
1410b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ"
1411ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14120ac07820SSatish Balay {
14134e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
14144e220ebcSLois Curfman McInnes   Mat         A = a->A,B = a->B;
14157d57db60SLois Curfman McInnes   int         ierr;
1416329f5518SBarry Smith   PetscReal   isend[5],irecv[5];
14170ac07820SSatish Balay 
1418d64ed03dSBarry Smith   PetscFunctionBegin;
14194e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
14204e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
14210e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1422de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14234e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
14240e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1425de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
14260ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14274e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14284e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14294e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14304e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14314e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14320ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1433f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
14344e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14354e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14364e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14374e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14384e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14390ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1440f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
14414e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14424e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14434e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14444e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14454e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1446d41123aaSBarry Smith   } else {
1447d41123aaSBarry Smith     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
14480ac07820SSatish Balay   }
14495a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
14505a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
14515a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
14525a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
14534e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14544e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14554e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14563a40ed3dSBarry Smith   PetscFunctionReturn(0);
14570ac07820SSatish Balay }
14580ac07820SSatish Balay 
14595615d1e5SSatish Balay #undef __FUNC__
1460b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ"
1461ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
146258667388SSatish Balay {
146358667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
146498305bb5SBarry Smith   int         ierr;
146558667388SSatish Balay 
1466d64ed03dSBarry Smith   PetscFunctionBegin;
146758667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
146858667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
14696da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1470c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
14714787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
14727c922b88SBarry Smith       op == MAT_KEEP_ZEROED_ROWS ||
14734787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
147498305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
147598305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1476b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
14777c922b88SBarry Smith         a->roworiented = PETSC_TRUE;
147898305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
147998305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1480b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
14816da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
148258667388SSatish Balay              op == MAT_SYMMETRIC ||
148358667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1484b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
148598305bb5SBarry Smith              op == MAT_USE_HASH_TABLE) {
148658667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
148798305bb5SBarry Smith   } else if (op == MAT_COLUMN_ORIENTED) {
14887c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
148998305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
149098305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
14912b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
14927c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
1493d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1494d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1495133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
14967c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
1497d64ed03dSBarry Smith   } else {
1498d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1499d64ed03dSBarry Smith   }
15003a40ed3dSBarry Smith   PetscFunctionReturn(0);
150158667388SSatish Balay }
150258667388SSatish Balay 
15035615d1e5SSatish Balay #undef __FUNC__
1504b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ("
1505ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15060ac07820SSatish Balay {
15070ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
15080ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15090ac07820SSatish Balay   Mat         B;
151040011551SBarry Smith   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
15110ac07820SSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
15123eda8832SBarry Smith   MatScalar   *a;
15130ac07820SSatish Balay 
1514d64ed03dSBarry Smith   PetscFunctionBegin;
15157c922b88SBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
15167c922b88SBarry Smith   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
15170ac07820SSatish Balay 
15180ac07820SSatish Balay   /* copy over the A part */
15190ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15200ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15210ac07820SSatish Balay   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
15220ac07820SSatish Balay 
15230ac07820SSatish Balay   for (i=0; i<mbs; i++) {
15240ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15250ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
15260ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
15270ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
15280ac07820SSatish Balay       for (k=0; k<bs; k++) {
152993fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15300ac07820SSatish Balay         col++; a += bs;
15310ac07820SSatish Balay       }
15320ac07820SSatish Balay     }
15330ac07820SSatish Balay   }
15340ac07820SSatish Balay   /* copy over the B part */
15350ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
15360ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15370ac07820SSatish Balay   for (i=0; i<mbs; i++) {
15380ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15390ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
15400ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
15410ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
15420ac07820SSatish Balay       for (k=0; k<bs; k++) {
154393fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15440ac07820SSatish Balay         col++; a += bs;
15450ac07820SSatish Balay       }
15460ac07820SSatish Balay     }
15470ac07820SSatish Balay   }
1548606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
15490ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15500ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15510ac07820SSatish Balay 
15527c922b88SBarry Smith   if (matout) {
15530ac07820SSatish Balay     *matout = B;
15540ac07820SSatish Balay   } else {
1555f830108cSBarry Smith     PetscOps *Abops;
1556cc2dc46cSBarry Smith     MatOps   Aops;
1557f830108cSBarry Smith 
15580ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
1559606d414cSSatish Balay     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
15600ac07820SSatish Balay     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
15610ac07820SSatish Balay     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1562aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
15630f5bd95cSBarry Smith     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1564b1fc9764SSatish Balay #else
1565606d414cSSatish Balay     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1566b1fc9764SSatish Balay #endif
1567606d414cSSatish Balay     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1568606d414cSSatish Balay     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1569606d414cSSatish Balay     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1570606d414cSSatish Balay     ierr = PetscFree(baij);CHKERRQ(ierr);
1571f830108cSBarry Smith 
1572f830108cSBarry Smith     /*
1573f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1574f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1575f830108cSBarry Smith       else from C.
1576f830108cSBarry Smith     */
1577f830108cSBarry Smith     Abops   = A->bops;
1578f830108cSBarry Smith     Aops    = A->ops;
1579549d3d68SSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1580f830108cSBarry Smith     A->bops = Abops;
1581f830108cSBarry Smith     A->ops  = Aops;
1582f830108cSBarry Smith 
15830ac07820SSatish Balay     PetscHeaderDestroy(B);
15840ac07820SSatish Balay   }
15853a40ed3dSBarry Smith   PetscFunctionReturn(0);
15860ac07820SSatish Balay }
15870e95ebc0SSatish Balay 
15885615d1e5SSatish Balay #undef __FUNC__
1589b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ"
159036c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
15910e95ebc0SSatish Balay {
159236c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
159336c4a09eSSatish Balay   Mat         a = baij->A,b = baij->B;
15940e95ebc0SSatish Balay   int         ierr,s1,s2,s3;
15950e95ebc0SSatish Balay 
1596d64ed03dSBarry Smith   PetscFunctionBegin;
159736c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
159836c4a09eSSatish Balay   if (rr) {
159936c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
160036c4a09eSSatish Balay     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
160136c4a09eSSatish Balay     /* Overlap communication with computation. */
160236c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
160336c4a09eSSatish Balay   }
16040e95ebc0SSatish Balay   if (ll) {
16050e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
160636c4a09eSSatish Balay     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1607a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
16080e95ebc0SSatish Balay   }
160936c4a09eSSatish Balay   /* scale  the diagonal block */
161036c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
161136c4a09eSSatish Balay 
161236c4a09eSSatish Balay   if (rr) {
161336c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
161436c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1615a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
161636c4a09eSSatish Balay   }
161736c4a09eSSatish Balay 
16183a40ed3dSBarry Smith   PetscFunctionReturn(0);
16190e95ebc0SSatish Balay }
16200e95ebc0SSatish Balay 
16215615d1e5SSatish Balay #undef __FUNC__
1622b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ"
1623ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
16240ac07820SSatish Balay {
16250ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
16260ac07820SSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1627a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
16280ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
16290ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1630a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16310ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16320ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16330ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16340ac07820SSatish Balay   IS             istmp;
16350ac07820SSatish Balay 
1636d64ed03dSBarry Smith   PetscFunctionBegin;
16370ac07820SSatish Balay   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
16380ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
16390ac07820SSatish Balay 
16400ac07820SSatish Balay   /*  first count number of contributors to each processor */
16410ac07820SSatish Balay   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1642549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1643549d3d68SSatish Balay   procs  = nprocs + size;
16440ac07820SSatish Balay   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
16450ac07820SSatish Balay   for (i=0; i<N; i++) {
16460ac07820SSatish Balay     idx   = rows[i];
16470ac07820SSatish Balay     found = 0;
16480ac07820SSatish Balay     for (j=0; j<size; j++) {
16490ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
16500ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
16510ac07820SSatish Balay       }
16520ac07820SSatish Balay     }
1653a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
16540ac07820SSatish Balay   }
16550ac07820SSatish Balay   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
16560ac07820SSatish Balay 
16570ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
16586831982aSBarry Smith   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
16596831982aSBarry Smith   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
16600ac07820SSatish Balay   nmax   = work[rank];
16616831982aSBarry Smith   nrecvs = work[size+rank];
1662606d414cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
16630ac07820SSatish Balay 
16640ac07820SSatish Balay   /* post receives:   */
1665d64ed03dSBarry Smith   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1666d64ed03dSBarry Smith   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
16670ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1668ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
16690ac07820SSatish Balay   }
16700ac07820SSatish Balay 
16710ac07820SSatish Balay   /* do sends:
16720ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
16730ac07820SSatish Balay      the ith processor
16740ac07820SSatish Balay   */
16750ac07820SSatish Balay   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1676ca161407SBarry Smith   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
16770ac07820SSatish Balay   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
16780ac07820SSatish Balay   starts[0]  = 0;
16790ac07820SSatish Balay   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
16800ac07820SSatish Balay   for (i=0; i<N; i++) {
16810ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
16820ac07820SSatish Balay   }
16836831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
16840ac07820SSatish Balay 
16850ac07820SSatish Balay   starts[0] = 0;
16860ac07820SSatish Balay   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
16870ac07820SSatish Balay   count = 0;
16880ac07820SSatish Balay   for (i=0; i<size; i++) {
16890ac07820SSatish Balay     if (procs[i]) {
1690ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
16910ac07820SSatish Balay     }
16920ac07820SSatish Balay   }
1693606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
16940ac07820SSatish Balay 
16950ac07820SSatish Balay   base = owners[rank]*bs;
16960ac07820SSatish Balay 
16970ac07820SSatish Balay   /*  wait on receives */
16980ac07820SSatish Balay   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
16990ac07820SSatish Balay   source = lens + nrecvs;
17000ac07820SSatish Balay   count  = nrecvs; slen = 0;
17010ac07820SSatish Balay   while (count) {
1702ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17030ac07820SSatish Balay     /* unpack receives into our local space */
1704ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
17050ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17060ac07820SSatish Balay     lens[imdex]    = n;
17070ac07820SSatish Balay     slen          += n;
17080ac07820SSatish Balay     count--;
17090ac07820SSatish Balay   }
1710606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
17110ac07820SSatish Balay 
17120ac07820SSatish Balay   /* move the data into the send scatter */
17130ac07820SSatish Balay   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
17140ac07820SSatish Balay   count = 0;
17150ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
17160ac07820SSatish Balay     values = rvalues + i*nmax;
17170ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
17180ac07820SSatish Balay       lrows[count++] = values[j] - base;
17190ac07820SSatish Balay     }
17200ac07820SSatish Balay   }
1721606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1722606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1723606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1724606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
17250ac07820SSatish Balay 
17260ac07820SSatish Balay   /* actually zap the local rows */
1727029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
17280ac07820SSatish Balay   PLogObjectParent(A,istmp);
1729a07cd24cSSatish Balay 
173072dacd9aSBarry Smith   /*
173172dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
173272dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
173372dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
173472dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
173572dacd9aSBarry Smith 
173672dacd9aSBarry Smith        Contributed by: Mathew Knepley
173772dacd9aSBarry Smith   */
17389c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
17393eda8832SBarry Smith   ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr);
17409c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
17413eda8832SBarry Smith     ierr      = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr);
17429c957beeSSatish Balay   } else if (diag) {
17433eda8832SBarry Smith     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1744fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1745fa46199cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1746fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
17476525c446SSatish Balay     }
1748a07cd24cSSatish Balay     for (i = 0; i < slen; i++) {
1749a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
17503eda8832SBarry Smith       ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1751a07cd24cSSatish Balay     }
1752a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1753a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17549c957beeSSatish Balay   } else {
17553eda8832SBarry Smith     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1756a07cd24cSSatish Balay   }
17579c957beeSSatish Balay 
17589c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1759606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1760a07cd24cSSatish Balay 
17610ac07820SSatish Balay   /* wait on sends */
17620ac07820SSatish Balay   if (nsends) {
1763d64ed03dSBarry Smith     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1764ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1765606d414cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
17660ac07820SSatish Balay   }
1767606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1768606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
17690ac07820SSatish Balay 
17703a40ed3dSBarry Smith   PetscFunctionReturn(0);
17710ac07820SSatish Balay }
177272dacd9aSBarry Smith 
17735615d1e5SSatish Balay #undef __FUNC__
1774b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ"
1775ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1776ba4ca20aSSatish Balay {
1777ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
177825fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1779133cdb44SSatish Balay   static int  called = 0;
17803a40ed3dSBarry Smith   int         ierr;
1781ba4ca20aSSatish Balay 
1782d64ed03dSBarry Smith   PetscFunctionBegin;
17833a40ed3dSBarry Smith   if (!a->rank) {
17843a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
178525fdafccSSatish Balay   }
178625fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1787d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1788d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
17893a40ed3dSBarry Smith   PetscFunctionReturn(0);
1790ba4ca20aSSatish Balay }
17910ac07820SSatish Balay 
17925615d1e5SSatish Balay #undef __FUNC__
1793b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ"
1794ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1795bb5a7306SBarry Smith {
1796bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1797bb5a7306SBarry Smith   int         ierr;
1798d64ed03dSBarry Smith 
1799d64ed03dSBarry Smith   PetscFunctionBegin;
1800bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
18013a40ed3dSBarry Smith   PetscFunctionReturn(0);
1802bb5a7306SBarry Smith }
1803bb5a7306SBarry Smith 
18042e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18050ac07820SSatish Balay 
18067fc3c18eSBarry Smith #undef __FUNC__
1807b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ"
18087fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
18097fc3c18eSBarry Smith {
18107fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
18117fc3c18eSBarry Smith   Mat         a,b,c,d;
18127fc3c18eSBarry Smith   PetscTruth  flg;
18137fc3c18eSBarry Smith   int         ierr;
18147fc3c18eSBarry Smith 
18157fc3c18eSBarry Smith   PetscFunctionBegin;
18167fc3c18eSBarry Smith   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
18177fc3c18eSBarry Smith   a = matA->A; b = matA->B;
18187fc3c18eSBarry Smith   c = matB->A; d = matB->B;
18197fc3c18eSBarry Smith 
18207fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
18217fc3c18eSBarry Smith   if (flg == PETSC_TRUE) {
18227fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18237fc3c18eSBarry Smith   }
18247fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
18257fc3c18eSBarry Smith   PetscFunctionReturn(0);
18267fc3c18eSBarry Smith }
18277fc3c18eSBarry Smith 
182879bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1829cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1830cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1831cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1832cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1833cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1834cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
18357c922b88SBarry Smith   MatMultTranspose_MPIBAIJ,
18367c922b88SBarry Smith   MatMultTransposeAdd_MPIBAIJ,
1837cc2dc46cSBarry Smith   0,
1838cc2dc46cSBarry Smith   0,
1839cc2dc46cSBarry Smith   0,
1840cc2dc46cSBarry Smith   0,
1841cc2dc46cSBarry Smith   0,
1842cc2dc46cSBarry Smith   0,
1843cc2dc46cSBarry Smith   0,
1844cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1845cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
18467fc3c18eSBarry Smith   MatEqual_MPIBAIJ,
1847cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1848cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1849cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1850cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1851cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1852cc2dc46cSBarry Smith   0,
1853cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1854cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1855cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1856cc2dc46cSBarry Smith   0,
1857cc2dc46cSBarry Smith   0,
1858cc2dc46cSBarry Smith   0,
1859cc2dc46cSBarry Smith   0,
1860cc2dc46cSBarry Smith   MatGetSize_MPIBAIJ,
1861cc2dc46cSBarry Smith   MatGetLocalSize_MPIBAIJ,
1862cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1863cc2dc46cSBarry Smith   0,
1864cc2dc46cSBarry Smith   0,
1865cc2dc46cSBarry Smith   0,
1866cc2dc46cSBarry Smith   0,
18672e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1868cc2dc46cSBarry Smith   0,
1869cc2dc46cSBarry Smith   0,
1870cc2dc46cSBarry Smith   0,
1871cc2dc46cSBarry Smith   0,
1872cc2dc46cSBarry Smith   0,
1873cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1874cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1875cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1876cc2dc46cSBarry Smith   0,
1877cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1878cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1879cc2dc46cSBarry Smith   0,
1880cc2dc46cSBarry Smith   0,
1881cc2dc46cSBarry Smith   0,
1882cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1883cc2dc46cSBarry Smith   0,
1884cc2dc46cSBarry Smith   0,
1885cc2dc46cSBarry Smith   0,
1886cc2dc46cSBarry Smith   0,
1887cc2dc46cSBarry Smith   0,
1888cc2dc46cSBarry Smith   0,
1889cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
1890cc2dc46cSBarry Smith   0,
1891cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
1892cc2dc46cSBarry Smith   0,
1893cc2dc46cSBarry Smith   0,
1894cc2dc46cSBarry Smith   0,
1895cc2dc46cSBarry Smith   MatGetMaps_Petsc};
189679bdfe76SSatish Balay 
18975ef9f2a5SBarry Smith 
1898e18c124aSSatish Balay EXTERN_C_BEGIN
18995ef9f2a5SBarry Smith #undef __FUNC__
1900b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ"
19015ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
19025ef9f2a5SBarry Smith {
19035ef9f2a5SBarry Smith   PetscFunctionBegin;
19045ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
19055ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
19065ef9f2a5SBarry Smith   PetscFunctionReturn(0);
19075ef9f2a5SBarry Smith }
1908e18c124aSSatish Balay EXTERN_C_END
190979bdfe76SSatish Balay 
19105615d1e5SSatish Balay #undef __FUNC__
1911b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ"
191279bdfe76SSatish Balay /*@C
191379bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
191479bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
191579bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
191679bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
191779bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
191879bdfe76SSatish Balay 
1919db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1920db81eaa0SLois Curfman McInnes 
192179bdfe76SSatish Balay    Input Parameters:
1922db81eaa0SLois Curfman McInnes +  comm - MPI communicator
192379bdfe76SSatish Balay .  bs   - size of blockk
192479bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
192592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
192692e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
192792e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
192892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
192992e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
1930be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1931be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
193279bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
193379bdfe76SSatish Balay            submatrix  (same for all local rows)
19347fc3c18eSBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
193592e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
1936db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
193792e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
193879bdfe76SSatish Balay            submatrix (same for all local rows).
19397fc3c18eSBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
194092e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
194192e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
194279bdfe76SSatish Balay 
194379bdfe76SSatish Balay    Output Parameter:
194479bdfe76SSatish Balay .  A - the matrix
194579bdfe76SSatish Balay 
1946db81eaa0SLois Curfman McInnes    Options Database Keys:
1947db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
1948db81eaa0SLois Curfman McInnes                      block calculations (much slower)
1949db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
1950494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
1951494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
19523ffaccefSLois Curfman McInnes 
1953b259b22eSLois Curfman McInnes    Notes:
195479bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
195579bdfe76SSatish Balay    (possibly both).
195679bdfe76SSatish Balay 
1957be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1958be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
1959be79a94dSBarry Smith 
196079bdfe76SSatish Balay    Storage Information:
196179bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
196279bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
196379bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
196479bdfe76SSatish Balay    local matrix (a rectangular submatrix).
196579bdfe76SSatish Balay 
196679bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
196779bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
196879bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
196979bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
197079bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
197179bdfe76SSatish Balay 
197279bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
197379bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
197479bdfe76SSatish Balay 
1975db81eaa0SLois Curfman McInnes .vb
1976db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
1977db81eaa0SLois Curfman McInnes           -------------------
1978db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
1979db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
1980db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
1981db81eaa0SLois Curfman McInnes           -------------------
1982db81eaa0SLois Curfman McInnes .ve
198379bdfe76SSatish Balay 
198479bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
198579bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
198679bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
198757b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
198879bdfe76SSatish Balay 
1989d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1990d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
199179bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
199292e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
199392e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
19946da5968aSLois Curfman McInnes    matrices.
199579bdfe76SSatish Balay 
1996027ccd11SLois Curfman McInnes    Level: intermediate
1997027ccd11SLois Curfman McInnes 
199892e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
199979bdfe76SSatish Balay 
20003eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
200179bdfe76SSatish Balay @*/
2002329f5518SBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
200379bdfe76SSatish Balay {
200479bdfe76SSatish Balay   Mat          B;
200579bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
2006f1af5d2fSBarry Smith   int          ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
2007f1af5d2fSBarry Smith   PetscTruth   flag1,flag2,flg;
200879bdfe76SSatish Balay 
2009d64ed03dSBarry Smith   PetscFunctionBegin;
2010962fb4a0SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2011962fb4a0SBarry Smith 
2012a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
201336c4a09eSSatish Balay   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
201436c4a09eSSatish Balay   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
20154fdb0a08SBarry Smith   if (d_nnz) {
201636c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
20174fdb0a08SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
20184fdb0a08SBarry Smith     }
20194fdb0a08SBarry Smith   }
20204fdb0a08SBarry Smith   if (o_nnz) {
202136c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
20224fdb0a08SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
20234fdb0a08SBarry Smith     }
20244fdb0a08SBarry Smith   }
20253914022bSBarry Smith 
2026d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2027494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2028494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2029494eafd4SBarry Smith   if (!flag1 && !flag2 && size == 1) {
20303914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
20313914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
20323914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
20333a40ed3dSBarry Smith     PetscFunctionReturn(0);
20343914022bSBarry Smith   }
20353914022bSBarry Smith 
203679bdfe76SSatish Balay   *A = 0;
20373f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
203879bdfe76SSatish Balay   PLogObjectCreate(B);
203979bdfe76SSatish Balay   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2040549d3d68SSatish Balay   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2041549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
20424c50302cSBarry Smith 
2043e1311b90SBarry Smith   B->ops->destroy    = MatDestroy_MPIBAIJ;
2044e1311b90SBarry Smith   B->ops->view       = MatView_MPIBAIJ;
204590f02eecSBarry Smith   B->mapping    = 0;
204679bdfe76SSatish Balay   B->factor     = 0;
204779bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
204879bdfe76SSatish Balay 
2049e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
2050d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2051d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
205279bdfe76SSatish Balay 
20537c922b88SBarry Smith   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2054a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2055d64ed03dSBarry Smith   }
2056a8c6a408SBarry Smith   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2057a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2058a8c6a408SBarry Smith   }
2059a8c6a408SBarry Smith   if (N == PETSC_DECIDE && n == PETSC_DECIDE) {
2060a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2061a8c6a408SBarry Smith   }
2062cee3aa6bSSatish Balay   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2063cee3aa6bSSatish Balay   if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
206479bdfe76SSatish Balay 
206579bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
206679bdfe76SSatish Balay     work[0] = m; work[1] = n;
206779bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
2068ca161407SBarry Smith     ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
206979bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
207079bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
207179bdfe76SSatish Balay   }
207279bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
207379bdfe76SSatish Balay     Mbs = M/bs;
2074a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
207579bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
207679bdfe76SSatish Balay     m   = mbs*bs;
207779bdfe76SSatish Balay   }
207879bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
207979bdfe76SSatish Balay     Nbs = N/bs;
2080a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
208179bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
208279bdfe76SSatish Balay     n   = nbs*bs;
208379bdfe76SSatish Balay   }
2084a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
2085a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2086a8c6a408SBarry Smith   }
208779bdfe76SSatish Balay 
208879bdfe76SSatish Balay   b->m = m; B->m = m;
208979bdfe76SSatish Balay   b->n = n; B->n = n;
209079bdfe76SSatish Balay   b->N = N; B->N = N;
209179bdfe76SSatish Balay   b->M = M; B->M = M;
209279bdfe76SSatish Balay   b->bs  = bs;
209379bdfe76SSatish Balay   b->bs2 = bs*bs;
209479bdfe76SSatish Balay   b->mbs = mbs;
209579bdfe76SSatish Balay   b->nbs = nbs;
209679bdfe76SSatish Balay   b->Mbs = Mbs;
209779bdfe76SSatish Balay   b->Nbs = Nbs;
209879bdfe76SSatish Balay 
2099c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
2100c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
2101488ecbafSBarry Smith   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2102488ecbafSBarry Smith   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2103c7fcc2eaSBarry Smith 
210479bdfe76SSatish Balay   /* build local table of row and column ownerships */
2105ff2fd236SBarry Smith   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2106ff2fd236SBarry Smith   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
21070ac07820SSatish Balay   b->cowners    = b->rowners + b->size + 2;
2108ff2fd236SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2109ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
211079bdfe76SSatish Balay   b->rowners[0]    = 0;
211179bdfe76SSatish Balay   for (i=2; i<=b->size; i++) {
211279bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
211379bdfe76SSatish Balay   }
2114ff2fd236SBarry Smith   for (i=0; i<=b->size; i++) {
2115ff2fd236SBarry Smith     b->rowners_bs[i] = b->rowners[i]*bs;
2116ff2fd236SBarry Smith   }
211779bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
211879bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
21194fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
21204fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
21214fa0d573SSatish Balay 
2122ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
212379bdfe76SSatish Balay   b->cowners[0] = 0;
212479bdfe76SSatish Balay   for (i=2; i<=b->size; i++) {
212579bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
212679bdfe76SSatish Balay   }
212779bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
212879bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
21294fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
21304fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
213179bdfe76SSatish Balay 
2132a07cd24cSSatish Balay 
213379bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2134029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
213579bdfe76SSatish Balay   PLogObjectParent(B,b->A);
213679bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2137029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
213879bdfe76SSatish Balay   PLogObjectParent(B,b->B);
213979bdfe76SSatish Balay 
214079bdfe76SSatish Balay   /* build cache for off array entries formed */
21418798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
21428798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
21437c922b88SBarry Smith   b->donotstash  = PETSC_FALSE;
21447c922b88SBarry Smith   b->colmap      = PETSC_NULL;
21457c922b88SBarry Smith   b->garray      = PETSC_NULL;
21467c922b88SBarry Smith   b->roworiented = PETSC_TRUE;
214779bdfe76SSatish Balay 
214830793edcSSatish Balay   /* stuff used in block assembly */
214930793edcSSatish Balay   b->barray       = 0;
215030793edcSSatish Balay 
215179bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
215279bdfe76SSatish Balay   b->lvec         = 0;
215379bdfe76SSatish Balay   b->Mvctx        = 0;
215479bdfe76SSatish Balay 
215579bdfe76SSatish Balay   /* stuff for MatGetRow() */
215679bdfe76SSatish Balay   b->rowindices   = 0;
215779bdfe76SSatish Balay   b->rowvalues    = 0;
215879bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
215979bdfe76SSatish Balay 
2160a07cd24cSSatish Balay   /* hash table stuff */
2161a07cd24cSSatish Balay   b->ht           = 0;
2162187ce0cbSSatish Balay   b->hd           = 0;
21630bdbc534SSatish Balay   b->ht_size      = 0;
21647c922b88SBarry Smith   b->ht_flag      = PETSC_FALSE;
216525fdafccSSatish Balay   b->ht_fact      = 0;
2166187ce0cbSSatish Balay   b->ht_total_ct  = 0;
2167187ce0cbSSatish Balay   b->ht_insert_ct = 0;
2168a07cd24cSSatish Balay 
216979bdfe76SSatish Balay   *A = B;
2170133cdb44SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2171133cdb44SSatish Balay   if (flg) {
2172133cdb44SSatish Balay     double fact = 1.39;
2173133cdb44SSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2174f1af5d2fSBarry Smith     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2175133cdb44SSatish Balay     if (fact <= 1.0) fact = 1.39;
2176133cdb44SSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2177133cdb44SSatish Balay     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2178133cdb44SSatish Balay   }
2179f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
21807fc3c18eSBarry Smith                                      "MatStoreValues_MPIBAIJ",
21817fc3c18eSBarry Smith                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2182f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
21837fc3c18eSBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
21847fc3c18eSBarry Smith                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2185f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
21865ef9f2a5SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
21875ef9f2a5SBarry Smith                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
21883a40ed3dSBarry Smith   PetscFunctionReturn(0);
218979bdfe76SSatish Balay }
2190026e39d0SSatish Balay 
21915615d1e5SSatish Balay #undef __FUNC__
2192b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ"
21932e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
21940ac07820SSatish Balay {
21950ac07820SSatish Balay   Mat         mat;
21960ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2197f1af5d2fSBarry Smith   int         ierr,len=0;
2198f1af5d2fSBarry Smith   PetscTruth  flg;
21990ac07820SSatish Balay 
2200d64ed03dSBarry Smith   PetscFunctionBegin;
22010ac07820SSatish Balay   *newmat       = 0;
22023f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
22030ac07820SSatish Balay   PLogObjectCreate(mat);
22040ac07820SSatish Balay   mat->data         = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2205549d3d68SSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2206e1311b90SBarry Smith   mat->ops->destroy = MatDestroy_MPIBAIJ;
2207e1311b90SBarry Smith   mat->ops->view    = MatView_MPIBAIJ;
22080ac07820SSatish Balay   mat->factor       = matin->factor;
22090ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
2210aef5e8e0SSatish Balay   mat->insertmode   = NOT_SET_VALUES;
22110ac07820SSatish Balay 
22120ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
22130ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
22140ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
22150ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
22160ac07820SSatish Balay 
22170ac07820SSatish Balay   a->bs  = oldmat->bs;
22180ac07820SSatish Balay   a->bs2 = oldmat->bs2;
22190ac07820SSatish Balay   a->mbs = oldmat->mbs;
22200ac07820SSatish Balay   a->nbs = oldmat->nbs;
22210ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
22220ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
22230ac07820SSatish Balay 
22240ac07820SSatish Balay   a->rstart       = oldmat->rstart;
22250ac07820SSatish Balay   a->rend         = oldmat->rend;
22260ac07820SSatish Balay   a->cstart       = oldmat->cstart;
22270ac07820SSatish Balay   a->cend         = oldmat->cend;
22280ac07820SSatish Balay   a->size         = oldmat->size;
22290ac07820SSatish Balay   a->rank         = oldmat->rank;
2230aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2231aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2232aef5e8e0SSatish Balay   a->rowindices   = 0;
22330ac07820SSatish Balay   a->rowvalues    = 0;
22340ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
223530793edcSSatish Balay   a->barray       = 0;
22363123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
22373123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
22383123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
22393123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
22400ac07820SSatish Balay 
2241133cdb44SSatish Balay   /* hash table stuff */
2242133cdb44SSatish Balay   a->ht           = 0;
2243133cdb44SSatish Balay   a->hd           = 0;
2244133cdb44SSatish Balay   a->ht_size      = 0;
2245133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
224625fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2247133cdb44SSatish Balay   a->ht_total_ct  = 0;
2248133cdb44SSatish Balay   a->ht_insert_ct = 0;
2249133cdb44SSatish Balay 
2250133cdb44SSatish Balay 
2251ff2fd236SBarry Smith   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2252ff2fd236SBarry Smith   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
22530ac07820SSatish Balay   a->cowners    = a->rowners + a->size + 2;
2254ff2fd236SBarry Smith   a->rowners_bs = a->cowners + a->size + 2;
2255549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
22568798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
22578798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
22580ac07820SSatish Balay   if (oldmat->colmap) {
2259aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
22600f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
226148e59246SSatish Balay #else
22620ac07820SSatish Balay     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
22630ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2264549d3d68SSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
226548e59246SSatish Balay #endif
22660ac07820SSatish Balay   } else a->colmap = 0;
22670ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
22680ac07820SSatish Balay     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
22690ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
2270549d3d68SSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
22710ac07820SSatish Balay   } else a->garray = 0;
22720ac07820SSatish Balay 
22730ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
22740ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
22750ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2276e18c124aSSatish Balay 
22770ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
22782e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
22790ac07820SSatish Balay   PLogObjectParent(mat,a->A);
22802e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
22810ac07820SSatish Balay   PLogObjectParent(mat,a->B);
22820ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2283e18c124aSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
22840ac07820SSatish Balay   if (flg) {
22850ac07820SSatish Balay     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
22860ac07820SSatish Balay   }
22870ac07820SSatish Balay   *newmat = mat;
22883a40ed3dSBarry Smith   PetscFunctionReturn(0);
22890ac07820SSatish Balay }
229057b952d6SSatish Balay 
229157b952d6SSatish Balay #include "sys.h"
229257b952d6SSatish Balay 
22935615d1e5SSatish Balay #undef __FUNC__
2294b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ"
229557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
229657b952d6SSatish Balay {
229757b952d6SSatish Balay   Mat          A;
229857b952d6SSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
229957b952d6SSatish Balay   Scalar       *vals,*buf;
230057b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
230157b952d6SSatish Balay   MPI_Status   status;
2302cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
230357b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2304f1af5d2fSBarry Smith   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
230557b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
230657b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
230757b952d6SSatish Balay 
2308d64ed03dSBarry Smith   PetscFunctionBegin;
2309f1af5d2fSBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
231057b952d6SSatish Balay 
2311d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2312d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
231357b952d6SSatish Balay   if (!rank) {
231457b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2315e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2316a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2317d64ed03dSBarry Smith     if (header[3] < 0) {
2318a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2319d64ed03dSBarry Smith     }
23206c5fab8fSBarry Smith   }
2321d64ed03dSBarry Smith 
2322ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
232357b952d6SSatish Balay   M = header[1]; N = header[2];
232457b952d6SSatish Balay 
2325a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
232657b952d6SSatish Balay 
232757b952d6SSatish Balay   /*
232857b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
232957b952d6SSatish Balay      divisible by the blocksize
233057b952d6SSatish Balay   */
233157b952d6SSatish Balay   Mbs        = M/bs;
233257b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
233357b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
233457b952d6SSatish Balay   else                  Mbs++;
233557b952d6SSatish Balay   if (extra_rows &&!rank) {
2336b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
233757b952d6SSatish Balay   }
2338537820f0SBarry Smith 
233957b952d6SSatish Balay   /* determine ownership of all rows */
234057b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
234157b952d6SSatish Balay   m   = mbs * bs;
2342cee3aa6bSSatish Balay   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2343cee3aa6bSSatish Balay   browners = rowners + size + 1;
2344ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
234557b952d6SSatish Balay   rowners[0] = 0;
2346cee3aa6bSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2347cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
234857b952d6SSatish Balay   rstart = rowners[rank];
234957b952d6SSatish Balay   rend   = rowners[rank+1];
235057b952d6SSatish Balay 
235157b952d6SSatish Balay   /* distribute row lengths to all processors */
235257b952d6SSatish Balay   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
235357b952d6SSatish Balay   if (!rank) {
235457b952d6SSatish Balay     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2355e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
235657b952d6SSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
235757b952d6SSatish Balay     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2358cee3aa6bSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2359ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2360606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2361d64ed03dSBarry Smith   } else {
2362ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
236357b952d6SSatish Balay   }
236457b952d6SSatish Balay 
236557b952d6SSatish Balay   if (!rank) {
236657b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
236757b952d6SSatish Balay     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2368549d3d68SSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
236957b952d6SSatish Balay     for (i=0; i<size; i++) {
237057b952d6SSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
237157b952d6SSatish Balay         procsnz[i] += rowlengths[j];
237257b952d6SSatish Balay       }
237357b952d6SSatish Balay     }
2374606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
237557b952d6SSatish Balay 
237657b952d6SSatish Balay     /* determine max buffer needed and allocate it */
237757b952d6SSatish Balay     maxnz = 0;
237857b952d6SSatish Balay     for (i=0; i<size; i++) {
237957b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
238057b952d6SSatish Balay     }
238157b952d6SSatish Balay     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
238257b952d6SSatish Balay 
238357b952d6SSatish Balay     /* read in my part of the matrix column indices  */
238457b952d6SSatish Balay     nz = procsnz[0];
238557b952d6SSatish Balay     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
238657b952d6SSatish Balay     mycols = ibuf;
2387cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2388e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2389cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2390cee3aa6bSSatish Balay 
239157b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
239257b952d6SSatish Balay     for (i=1; i<size-1; i++) {
239357b952d6SSatish Balay       nz   = procsnz[i];
2394e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2395ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
239657b952d6SSatish Balay     }
239757b952d6SSatish Balay     /* read in the stuff for the last proc */
239857b952d6SSatish Balay     if (size != 1) {
239957b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2400e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
240157b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2402ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
240357b952d6SSatish Balay     }
2404606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2405d64ed03dSBarry Smith   } else {
240657b952d6SSatish Balay     /* determine buffer space needed for message */
240757b952d6SSatish Balay     nz = 0;
240857b952d6SSatish Balay     for (i=0; i<m; i++) {
240957b952d6SSatish Balay       nz += locrowlens[i];
241057b952d6SSatish Balay     }
241157b952d6SSatish Balay     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
241257b952d6SSatish Balay     mycols = ibuf;
241357b952d6SSatish Balay     /* receive message of column indices*/
2414ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2415ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2416a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
241757b952d6SSatish Balay   }
241857b952d6SSatish Balay 
241957b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2420cee3aa6bSSatish Balay   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2421cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
242257b952d6SSatish Balay   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2423549d3d68SSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
242457b952d6SSatish Balay   masked1 = mask    + Mbs;
242557b952d6SSatish Balay   masked2 = masked1 + Mbs;
242657b952d6SSatish Balay   rowcount = 0; nzcount = 0;
242757b952d6SSatish Balay   for (i=0; i<mbs; i++) {
242857b952d6SSatish Balay     dcount  = 0;
242957b952d6SSatish Balay     odcount = 0;
243057b952d6SSatish Balay     for (j=0; j<bs; j++) {
243157b952d6SSatish Balay       kmax = locrowlens[rowcount];
243257b952d6SSatish Balay       for (k=0; k<kmax; k++) {
243357b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
243457b952d6SSatish Balay         if (!mask[tmp]) {
243557b952d6SSatish Balay           mask[tmp] = 1;
243657b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
243757b952d6SSatish Balay           else masked1[dcount++] = tmp;
243857b952d6SSatish Balay         }
243957b952d6SSatish Balay       }
244057b952d6SSatish Balay       rowcount++;
244157b952d6SSatish Balay     }
2442cee3aa6bSSatish Balay 
244357b952d6SSatish Balay     dlens[i]  = dcount;
244457b952d6SSatish Balay     odlens[i] = odcount;
2445cee3aa6bSSatish Balay 
244657b952d6SSatish Balay     /* zero out the mask elements we set */
244757b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
244857b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
244957b952d6SSatish Balay   }
2450cee3aa6bSSatish Balay 
245157b952d6SSatish Balay   /* create our matrix */
2452549d3d68SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
245357b952d6SSatish Balay   A = *newmat;
24546d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
245557b952d6SSatish Balay 
245657b952d6SSatish Balay   if (!rank) {
245757b952d6SSatish Balay     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
245857b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
245957b952d6SSatish Balay     nz = procsnz[0];
246057b952d6SSatish Balay     vals = buf;
2461cee3aa6bSSatish Balay     mycols = ibuf;
2462cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2463e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2464cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2465537820f0SBarry Smith 
246657b952d6SSatish Balay     /* insert into matrix */
246757b952d6SSatish Balay     jj      = rstart*bs;
246857b952d6SSatish Balay     for (i=0; i<m; i++) {
24693eda8832SBarry Smith       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
247057b952d6SSatish Balay       mycols += locrowlens[i];
247157b952d6SSatish Balay       vals   += locrowlens[i];
247257b952d6SSatish Balay       jj++;
247357b952d6SSatish Balay     }
247457b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
247557b952d6SSatish Balay     for (i=1; i<size-1; i++) {
247657b952d6SSatish Balay       nz   = procsnz[i];
247757b952d6SSatish Balay       vals = buf;
2478e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2479ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
248057b952d6SSatish Balay     }
248157b952d6SSatish Balay     /* the last proc */
248257b952d6SSatish Balay     if (size != 1){
248357b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2484cee3aa6bSSatish Balay       vals = buf;
2485e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
248657b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2487ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
248857b952d6SSatish Balay     }
2489606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2490d64ed03dSBarry Smith   } else {
249157b952d6SSatish Balay     /* receive numeric values */
249257b952d6SSatish Balay     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
249357b952d6SSatish Balay 
249457b952d6SSatish Balay     /* receive message of values*/
249557b952d6SSatish Balay     vals   = buf;
2496cee3aa6bSSatish Balay     mycols = ibuf;
2497ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2498ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2499a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
250057b952d6SSatish Balay 
250157b952d6SSatish Balay     /* insert into matrix */
250257b952d6SSatish Balay     jj      = rstart*bs;
2503cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
25043eda8832SBarry Smith       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
250557b952d6SSatish Balay       mycols += locrowlens[i];
250657b952d6SSatish Balay       vals   += locrowlens[i];
250757b952d6SSatish Balay       jj++;
250857b952d6SSatish Balay     }
250957b952d6SSatish Balay   }
2510606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2511606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2512606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2513606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2514606d414cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2515606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
25166d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25176d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25183a40ed3dSBarry Smith   PetscFunctionReturn(0);
251957b952d6SSatish Balay }
252057b952d6SSatish Balay 
2521133cdb44SSatish Balay #undef __FUNC__
2522b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor"
2523133cdb44SSatish Balay /*@
2524133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2525133cdb44SSatish Balay 
2526133cdb44SSatish Balay    Input Parameters:
2527133cdb44SSatish Balay .  mat  - the matrix
2528133cdb44SSatish Balay .  fact - factor
2529133cdb44SSatish Balay 
2530fee21e36SBarry Smith    Collective on Mat
2531fee21e36SBarry Smith 
25328c890885SBarry Smith    Level: advanced
25338c890885SBarry Smith 
2534133cdb44SSatish Balay   Notes:
2535133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2536133cdb44SSatish Balay 
2537133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2538133cdb44SSatish Balay 
2539133cdb44SSatish Balay .seealso: MatSetOption()
2540133cdb44SSatish Balay @*/
2541329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2542133cdb44SSatish Balay {
254325fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2544133cdb44SSatish Balay 
2545133cdb44SSatish Balay   PetscFunctionBegin;
2546133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
254725fdafccSSatish Balay   if (mat->type != MATMPIBAIJ) {
2548133cdb44SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2549133cdb44SSatish Balay   }
2550133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2551133cdb44SSatish Balay   baij->ht_fact = fact;
2552133cdb44SSatish Balay   PetscFunctionReturn(0);
2553133cdb44SSatish Balay }
2554