xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 7843d17afa8c1ec0c9a03b3c2ea6d3e3db42e252)
1*7843d17aSBarry Smith /*$Id: mpibaij.c,v 1.203 2000/09/28 21:11:32 bsmith Exp bsmith $*/
279bdfe76SSatish Balay 
3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
579bdfe76SSatish Balay 
6ca44d042SBarry Smith EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7ca44d042SBarry Smith EXTERN int DisAssemble_MPIBAIJ(Mat);
8ca44d042SBarry Smith EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9ca44d042SBarry Smith EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10ca44d042SBarry Smith EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11ca44d042SBarry Smith EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13ca44d042SBarry Smith EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14ca44d042SBarry Smith EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15ca44d042SBarry Smith EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16ca44d042SBarry Smith EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*);
1793fea6afSBarry Smith 
1893fea6afSBarry Smith /*  UGLY, ugly, ugly
1993fea6afSBarry Smith    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
2093fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
2193fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2293fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2393fea6afSBarry Smith    into the single precision data structures.
2493fea6afSBarry Smith */
2593fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
26ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27ca44d042SBarry Smith EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29ca44d042SBarry Smith EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
3193fea6afSBarry Smith #else
326fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
3393fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
3493fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
356fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
366fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
3793fea6afSBarry Smith #endif
383b2fbd54SBarry Smith 
39*7843d17aSBarry Smith #undef __FUNC__
40*7843d17aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetRowMax_MPIBAIJ"
41*7843d17aSBarry Smith int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
42*7843d17aSBarry Smith {
43*7843d17aSBarry Smith   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
44*7843d17aSBarry Smith   int          ierr,i;
45*7843d17aSBarry Smith   PetscReal    *va,*vb;
46*7843d17aSBarry Smith   Vec          vtmp;
47*7843d17aSBarry Smith 
48*7843d17aSBarry Smith   PetscFunctionBegin;
49*7843d17aSBarry Smith 
50*7843d17aSBarry Smith   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51*7843d17aSBarry Smith   ierr = VecGetArry(v,&va);CHKERRQ(ierr);
52*7843d17aSBarry Smith 
53*7843d17aSBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,a->m,&vtmp);CHKERRA(ierr);
54*7843d17aSBarry Smith   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55*7843d17aSBarry Smith   ierr = VecGetArry(vtmp,&vb);CHKERRQ(ierr);
56*7843d17aSBarry Smith 
57*7843d17aSBarry Smith   for (i=0; i<a->m; i++){
58*7843d17aSBarry Smith     if (va[i] < vb[i]) va[i] = vb[i];
59*7843d17aSBarry Smith   }
60*7843d17aSBarry Smith 
61*7843d17aSBarry Smith   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62*7843d17aSBarry Smith   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63*7843d17aSBarry Smith   ierr = VecDestroy(vtmp);CHKERRA(ierr);
64*7843d17aSBarry Smith 
65*7843d17aSBarry Smith   PetscFunctionReturn(0);
66*7843d17aSBarry Smith }
67*7843d17aSBarry Smith 
687fc3c18eSBarry Smith EXTERN_C_BEGIN
697fc3c18eSBarry Smith #undef __FUNC__
706fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ"
717fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat)
727fc3c18eSBarry Smith {
737fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
747fc3c18eSBarry Smith   int         ierr;
757fc3c18eSBarry Smith 
767fc3c18eSBarry Smith   PetscFunctionBegin;
777fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
787fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
797fc3c18eSBarry Smith   PetscFunctionReturn(0);
807fc3c18eSBarry Smith }
817fc3c18eSBarry Smith EXTERN_C_END
827fc3c18eSBarry Smith 
837fc3c18eSBarry Smith EXTERN_C_BEGIN
847fc3c18eSBarry Smith #undef __FUNC__
856fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ"
867fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat)
877fc3c18eSBarry Smith {
887fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
897fc3c18eSBarry Smith   int         ierr;
907fc3c18eSBarry Smith 
917fc3c18eSBarry Smith   PetscFunctionBegin;
927fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
937fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
947fc3c18eSBarry Smith   PetscFunctionReturn(0);
957fc3c18eSBarry Smith }
967fc3c18eSBarry Smith EXTERN_C_END
977fc3c18eSBarry Smith 
98537820f0SBarry Smith /*
99537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
10057b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
10157b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
10257b952d6SSatish Balay    length of colmap equals the global matrix length.
10357b952d6SSatish Balay */
1045615d1e5SSatish Balay #undef __FUNC__
1056fa18ffdSBarry Smith #define __FUNC__ /*<a name="CreateColmap_MPIBAIJ_Private"></a>*/"CreateColmap_MPIBAIJ_Private"
10657b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
10757b952d6SSatish Balay {
10857b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
10957b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110dc2900e9SSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
11157b952d6SSatish Balay 
112d64ed03dSBarry Smith   PetscFunctionBegin;
113aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
114f14a1c24SBarry Smith   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
11548e59246SSatish Balay   for (i=0; i<nbs; i++){
1160f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
11748e59246SSatish Balay   }
11848e59246SSatish Balay #else
119758f045eSSatish Balay   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
12057b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
121549d3d68SSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
12348e59246SSatish Balay #endif
1243a40ed3dSBarry Smith   PetscFunctionReturn(0);
12557b952d6SSatish Balay }
12657b952d6SSatish Balay 
12780c1aa95SSatish Balay #define CHUNKSIZE  10
12880c1aa95SSatish Balay 
129f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
13080c1aa95SSatish Balay { \
13180c1aa95SSatish Balay  \
13280c1aa95SSatish Balay     brow = row/bs;  \
13380c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
13580c1aa95SSatish Balay       bcol = col/bs; \
13680c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
137ab26458aSBarry Smith       low = 0; high = nrow; \
138ab26458aSBarry Smith       while (high-low > 3) { \
139ab26458aSBarry Smith         t = (low+high)/2; \
140ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
141ab26458aSBarry Smith         else              low  = t; \
142ab26458aSBarry Smith       } \
143ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
14480c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
14580c1aa95SSatish Balay         if (rp[_i] == bcol) { \
14680c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
148eada6651SSatish Balay           else                    *bap  = value;  \
149ac7a638eSSatish Balay           goto a_noinsert; \
15080c1aa95SSatish Balay         } \
15180c1aa95SSatish Balay       } \
15289280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
15329bbc08cSBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
15480c1aa95SSatish Balay       if (nrow >= rmax) { \
15580c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
15680c1aa95SSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
1573eda8832SBarry Smith         MatScalar *new_a; \
15880c1aa95SSatish Balay  \
15929bbc08cSBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
16089280ab3SLois Curfman McInnes  \
16180c1aa95SSatish Balay         /* malloc new storage space */ \
1623eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
1633eda8832SBarry Smith         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
16480c1aa95SSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
16580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
16680c1aa95SSatish Balay  \
16780c1aa95SSatish Balay         /* copy over old data into new slots */ \
16880c1aa95SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
16980c1aa95SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
17180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
1723eda8832SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
1733eda8832SBarry Smith         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
175549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
1763eda8832SBarry Smith                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
17780c1aa95SSatish Balay         /* free up old matrix storage */ \
178606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179606d414cSSatish Balay         if (!a->singlemalloc) { \
180606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182606d414cSSatish Balay         } \
18380c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1847c922b88SBarry Smith         a->singlemalloc = PETSC_TRUE; \
18580c1aa95SSatish Balay  \
18680c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
1883eda8832SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
18980c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
19080c1aa95SSatish Balay         a->reallocs++; \
19180c1aa95SSatish Balay         a->nz++; \
19280c1aa95SSatish Balay       } \
19380c1aa95SSatish Balay       N = nrow++ - 1;  \
19480c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
19580c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
19680c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1973eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
19880c1aa95SSatish Balay       } \
1993eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
20080c1aa95SSatish Balay       rp[_i]                      = bcol;  \
20180c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
202ac7a638eSSatish Balay       a_noinsert:; \
20380c1aa95SSatish Balay     ailen[brow] = nrow; \
20480c1aa95SSatish Balay }
20557b952d6SSatish Balay 
206ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207ac7a638eSSatish Balay { \
208ac7a638eSSatish Balay     brow = row/bs;  \
209ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
211ac7a638eSSatish Balay       bcol = col/bs; \
212ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
213ac7a638eSSatish Balay       low = 0; high = nrow; \
214ac7a638eSSatish Balay       while (high-low > 3) { \
215ac7a638eSSatish Balay         t = (low+high)/2; \
216ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
217ac7a638eSSatish Balay         else              low  = t; \
218ac7a638eSSatish Balay       } \
219ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
220ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
221ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
222ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
224ac7a638eSSatish Balay           else                    *bap  = value;  \
225ac7a638eSSatish Balay           goto b_noinsert; \
226ac7a638eSSatish Balay         } \
227ac7a638eSSatish Balay       } \
22889280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
22929bbc08cSBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
230ac7a638eSSatish Balay       if (nrow >= rmax) { \
231ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
23274c639caSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
2333eda8832SBarry Smith         MatScalar *new_a; \
234ac7a638eSSatish Balay  \
23529bbc08cSBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
23689280ab3SLois Curfman McInnes  \
237ac7a638eSSatish Balay         /* malloc new storage space */ \
2383eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
2393eda8832SBarry Smith         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
240ac7a638eSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
241ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
242ac7a638eSSatish Balay  \
243ac7a638eSSatish Balay         /* copy over old data into new slots */ \
244ac7a638eSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
24574c639caSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247ac7a638eSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
2483eda8832SBarry Smith         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
2493eda8832SBarry Smith         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
2503eda8832SBarry Smith         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
2523eda8832SBarry Smith                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253ac7a638eSSatish Balay         /* free up old matrix storage */ \
254606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255606d414cSSatish Balay         if (!b->singlemalloc) { \
256606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258606d414cSSatish Balay         } \
25974c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
2607c922b88SBarry Smith         b->singlemalloc = PETSC_TRUE; \
261ac7a638eSSatish Balay  \
262ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
2643eda8832SBarry Smith         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
26574c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
26674c639caSSatish Balay         b->reallocs++; \
26774c639caSSatish Balay         b->nz++; \
268ac7a638eSSatish Balay       } \
269ac7a638eSSatish Balay       N = nrow++ - 1;  \
270ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
271ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
272ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2733eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274ac7a638eSSatish Balay       } \
2753eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
277ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
278ac7a638eSSatish Balay       b_noinsert:; \
279ac7a638eSSatish Balay     bilen[brow] = nrow; \
280ac7a638eSSatish Balay }
281ac7a638eSSatish Balay 
28293fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2835615d1e5SSatish Balay #undef __FUNC__
2846fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
285ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
28657b952d6SSatish Balay {
2876fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
28893fea6afSBarry Smith   int         ierr,i,N = m*n;
2896fa18ffdSBarry Smith   MatScalar   *vsingle;
29093fea6afSBarry Smith 
29193fea6afSBarry Smith   PetscFunctionBegin;
2926fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2936fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
2946fa18ffdSBarry Smith     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
2956fa18ffdSBarry Smith     b->setvalueslen  = N;
2966fa18ffdSBarry Smith   }
2976fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2986fa18ffdSBarry Smith 
29993fea6afSBarry Smith   for (i=0; i<N; i++) {
30093fea6afSBarry Smith     vsingle[i] = v[i];
30193fea6afSBarry Smith   }
30293fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
30393fea6afSBarry Smith   PetscFunctionReturn(0);
30493fea6afSBarry Smith }
30593fea6afSBarry Smith 
30693fea6afSBarry Smith #undef __FUNC__
3076fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ"
30893fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
30993fea6afSBarry Smith {
3106fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3116fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
3126fa18ffdSBarry Smith   MatScalar   *vsingle;
31393fea6afSBarry Smith 
31493fea6afSBarry Smith   PetscFunctionBegin;
3156fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3166fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
3176fa18ffdSBarry Smith     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
3186fa18ffdSBarry Smith     b->setvalueslen  = N;
3196fa18ffdSBarry Smith   }
3206fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
32193fea6afSBarry Smith   for (i=0; i<N; i++) {
32293fea6afSBarry Smith     vsingle[i] = v[i];
32393fea6afSBarry Smith   }
32493fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
32593fea6afSBarry Smith   PetscFunctionReturn(0);
32693fea6afSBarry Smith }
3276fa18ffdSBarry Smith 
3286fa18ffdSBarry Smith #undef __FUNC__
3296fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT"></a>*/"MatSetValues_MPIBAIJ_HT"
3306fa18ffdSBarry Smith int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
3316fa18ffdSBarry Smith {
3326fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3336fa18ffdSBarry Smith   int         ierr,i,N = m*n;
3346fa18ffdSBarry Smith   MatScalar   *vsingle;
3356fa18ffdSBarry Smith 
3366fa18ffdSBarry Smith   PetscFunctionBegin;
3376fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3386fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
3396fa18ffdSBarry Smith     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
3406fa18ffdSBarry Smith     b->setvalueslen  = N;
3416fa18ffdSBarry Smith   }
3426fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3436fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3446fa18ffdSBarry Smith     vsingle[i] = v[i];
3456fa18ffdSBarry Smith   }
3466fa18ffdSBarry Smith   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3476fa18ffdSBarry Smith   PetscFunctionReturn(0);
3486fa18ffdSBarry Smith }
3496fa18ffdSBarry Smith 
3506fa18ffdSBarry Smith #undef __FUNC__
3516fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ_HT"></a>*/"MatSetValuesBlocked_MPIBAIJ_HT"
3526fa18ffdSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
3536fa18ffdSBarry Smith {
3546fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3556fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
3566fa18ffdSBarry Smith   MatScalar   *vsingle;
3576fa18ffdSBarry Smith 
3586fa18ffdSBarry Smith   PetscFunctionBegin;
3596fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3606fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
3616fa18ffdSBarry Smith     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
3626fa18ffdSBarry Smith     b->setvalueslen  = N;
3636fa18ffdSBarry Smith   }
3646fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3656fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3666fa18ffdSBarry Smith     vsingle[i] = v[i];
3676fa18ffdSBarry Smith   }
3686fa18ffdSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3696fa18ffdSBarry Smith   PetscFunctionReturn(0);
3706fa18ffdSBarry Smith }
37193fea6afSBarry Smith #endif
37293fea6afSBarry Smith 
37393fea6afSBarry Smith #undef __FUNC__
3746fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
37593fea6afSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
37693fea6afSBarry Smith {
37757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
37893fea6afSBarry Smith   MatScalar   value;
3794fa0d573SSatish Balay   int         ierr,i,j,row,col;
3804fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
3814fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
3824fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
38357b952d6SSatish Balay 
384eada6651SSatish Balay   /* Some Variables required in the macro */
38580c1aa95SSatish Balay   Mat         A = baij->A;
38680c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
387ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
3883eda8832SBarry Smith   MatScalar   *aa=a->a;
389ac7a638eSSatish Balay 
390ac7a638eSSatish Balay   Mat         B = baij->B;
391ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
392ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3933eda8832SBarry Smith   MatScalar   *ba=b->a;
394ac7a638eSSatish Balay 
395ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
396ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
3973eda8832SBarry Smith   MatScalar   *ap,*bap;
39880c1aa95SSatish Balay 
399d64ed03dSBarry Smith   PetscFunctionBegin;
40057b952d6SSatish Balay   for (i=0; i<m; i++) {
4015ef9f2a5SBarry Smith     if (im[i] < 0) continue;
402aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
40329bbc08cSBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
404639f9d9dSBarry Smith #endif
40557b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
40657b952d6SSatish Balay       row = im[i] - rstart_orig;
40757b952d6SSatish Balay       for (j=0; j<n; j++) {
40857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
40957b952d6SSatish Balay           col = in[j] - cstart_orig;
41057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
41280c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
41373959e64SBarry Smith         } else if (in[j] < 0) continue;
414aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
41529bbc08cSBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
416639f9d9dSBarry Smith #endif
41757b952d6SSatish Balay         else {
41857b952d6SSatish Balay           if (mat->was_assembled) {
419905e6a2fSBarry Smith             if (!baij->colmap) {
420905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
421905e6a2fSBarry Smith             }
422aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4230f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
424fa46199cSSatish Balay             col  = col - 1 + in[j]%bs;
42548e59246SSatish Balay #else
426905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
42748e59246SSatish Balay #endif
42857b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
42957b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
43057b952d6SSatish Balay               col =  in[j];
4319bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
4329bf004c3SSatish Balay               B = baij->B;
4339bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
4349bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
4359bf004c3SSatish Balay               ba=b->a;
43657b952d6SSatish Balay             }
437d64ed03dSBarry Smith           } else col = in[j];
43857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
439ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
440ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
44157b952d6SSatish Balay         }
44257b952d6SSatish Balay       }
443d64ed03dSBarry Smith     } else {
44490f02eecSBarry Smith       if (!baij->donotstash) {
445ff2fd236SBarry Smith         if (roworiented) {
4466fa18ffdSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
447ff2fd236SBarry Smith         } else {
4486fa18ffdSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
44957b952d6SSatish Balay         }
45057b952d6SSatish Balay       }
45157b952d6SSatish Balay     }
45290f02eecSBarry Smith   }
4533a40ed3dSBarry Smith   PetscFunctionReturn(0);
45457b952d6SSatish Balay }
45557b952d6SSatish Balay 
456ab26458aSBarry Smith #undef __FUNC__
4576fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ"
45893fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
459ab26458aSBarry Smith {
460ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
46193fea6afSBarry Smith   MatScalar   *value,*barray=baij->barray;
4627ef1d9bdSSatish Balay   int         ierr,i,j,ii,jj,row,col;
463ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
464ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
465ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
466ab26458aSBarry Smith 
467b16ae2b1SBarry Smith   PetscFunctionBegin;
46830793edcSSatish Balay   if(!barray) {
46993fea6afSBarry Smith     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
47030793edcSSatish Balay   }
47130793edcSSatish Balay 
472ab26458aSBarry Smith   if (roworiented) {
473ab26458aSBarry Smith     stepval = (n-1)*bs;
474ab26458aSBarry Smith   } else {
475ab26458aSBarry Smith     stepval = (m-1)*bs;
476ab26458aSBarry Smith   }
477ab26458aSBarry Smith   for (i=0; i<m; i++) {
4785ef9f2a5SBarry Smith     if (im[i] < 0) continue;
479aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
48029bbc08cSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
481ab26458aSBarry Smith #endif
482ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
483ab26458aSBarry Smith       row = im[i] - rstart;
484ab26458aSBarry Smith       for (j=0; j<n; j++) {
48515b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
48615b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
48715b57d14SSatish Balay           barray = v + i*bs2;
48815b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
48915b57d14SSatish Balay           barray = v + j*bs2;
49015b57d14SSatish Balay         } else { /* Here a copy is required */
491ab26458aSBarry Smith           if (roworiented) {
492ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
493ab26458aSBarry Smith           } else {
494ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
495abef11f7SSatish Balay           }
49647513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
49747513183SBarry Smith             for (jj=0; jj<bs; jj++) {
49830793edcSSatish Balay               *barray++  = *value++;
49947513183SBarry Smith             }
50047513183SBarry Smith           }
50130793edcSSatish Balay           barray -=bs2;
50215b57d14SSatish Balay         }
503abef11f7SSatish Balay 
504abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
505abef11f7SSatish Balay           col  = in[j] - cstart;
50693fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
507ab26458aSBarry Smith         }
5085ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
509aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
51029bbc08cSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
511ab26458aSBarry Smith #endif
512ab26458aSBarry Smith         else {
513ab26458aSBarry Smith           if (mat->was_assembled) {
514ab26458aSBarry Smith             if (!baij->colmap) {
515ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
516ab26458aSBarry Smith             }
517a5eb4965SSatish Balay 
518aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
519aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
520fa46199cSSatish Balay             { int data;
5210f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
52229bbc08cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
523fa46199cSSatish Balay             }
52448e59246SSatish Balay #else
52529bbc08cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
526a5eb4965SSatish Balay #endif
52748e59246SSatish Balay #endif
528aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
5290f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
530fa46199cSSatish Balay             col  = (col - 1)/bs;
53148e59246SSatish Balay #else
532a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
53348e59246SSatish Balay #endif
534ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
535ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
536ab26458aSBarry Smith               col =  in[j];
537ab26458aSBarry Smith             }
538ab26458aSBarry Smith           }
539ab26458aSBarry Smith           else col = in[j];
54093fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
541ab26458aSBarry Smith         }
542ab26458aSBarry Smith       }
543d64ed03dSBarry Smith     } else {
544ab26458aSBarry Smith       if (!baij->donotstash) {
545ff2fd236SBarry Smith         if (roworiented) {
5466fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
547ff2fd236SBarry Smith         } else {
5486fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549ff2fd236SBarry Smith         }
550abef11f7SSatish Balay       }
551ab26458aSBarry Smith     }
552ab26458aSBarry Smith   }
5533a40ed3dSBarry Smith   PetscFunctionReturn(0);
554ab26458aSBarry Smith }
5556fa18ffdSBarry Smith 
5560bdbc534SSatish Balay #define HASH_KEY 0.6180339887
557c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
5586fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
559c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
5605615d1e5SSatish Balay #undef __FUNC__
5616fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPIBAIJ_HT_MatScalar"
5626fa18ffdSBarry Smith int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
5630bdbc534SSatish Balay {
5640bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
5650bdbc534SSatish Balay   int         ierr,i,j,row,col;
5660bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
567c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
568c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
569329f5518SBarry Smith   PetscReal   tmp;
5703eda8832SBarry Smith   MatScalar   **HD = baij->hd,value;
571aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5724a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5734a15367fSSatish Balay #endif
5740bdbc534SSatish Balay 
5750bdbc534SSatish Balay   PetscFunctionBegin;
5760bdbc534SSatish Balay 
5770bdbc534SSatish Balay   for (i=0; i<m; i++) {
578aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
57929bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
58029bbc08cSBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
5810bdbc534SSatish Balay #endif
5820bdbc534SSatish Balay       row = im[i];
583c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
5840bdbc534SSatish Balay       for (j=0; j<n; j++) {
5850bdbc534SSatish Balay         col = in[j];
5866fa18ffdSBarry Smith         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5870bdbc534SSatish Balay         /* Look up into the Hash Table */
588c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
589c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5900bdbc534SSatish Balay 
591c2760754SSatish Balay 
592c2760754SSatish Balay         idx = h1;
593aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
594187ce0cbSSatish Balay         insert_ct++;
595187ce0cbSSatish Balay         total_ct++;
596187ce0cbSSatish Balay         if (HT[idx] != key) {
597187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
598187ce0cbSSatish Balay           if (idx == size) {
599187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
600187ce0cbSSatish Balay             if (idx == h1) {
60129bbc08cSBarry Smith               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
602187ce0cbSSatish Balay             }
603187ce0cbSSatish Balay           }
604187ce0cbSSatish Balay         }
605187ce0cbSSatish Balay #else
606c2760754SSatish Balay         if (HT[idx] != key) {
607c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
608c2760754SSatish Balay           if (idx == size) {
609c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
610c2760754SSatish Balay             if (idx == h1) {
61129bbc08cSBarry Smith               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
612c2760754SSatish Balay             }
613c2760754SSatish Balay           }
614c2760754SSatish Balay         }
615187ce0cbSSatish Balay #endif
616c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
617c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
618c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
6190bdbc534SSatish Balay       }
6200bdbc534SSatish Balay     } else {
6210bdbc534SSatish Balay       if (!baij->donotstash) {
622ff2fd236SBarry Smith         if (roworiented) {
6238798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
624ff2fd236SBarry Smith         } else {
6258798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
6260bdbc534SSatish Balay         }
6270bdbc534SSatish Balay       }
6280bdbc534SSatish Balay     }
6290bdbc534SSatish Balay   }
630aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
631187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
632187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
633187ce0cbSSatish Balay #endif
6340bdbc534SSatish Balay   PetscFunctionReturn(0);
6350bdbc534SSatish Balay }
6360bdbc534SSatish Balay 
6370bdbc534SSatish Balay #undef __FUNC__
6386fa18ffdSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
6396fa18ffdSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
6400bdbc534SSatish Balay {
6410bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
6428798bf22SSatish Balay   int         ierr,i,j,ii,jj,row,col;
6430bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
644b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
645c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
646329f5518SBarry Smith   PetscReal   tmp;
6473eda8832SBarry Smith   MatScalar   **HD = baij->hd,*baij_a;
6486fa18ffdSBarry Smith   MatScalar   *v_t,*value;
649aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
6504a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
6514a15367fSSatish Balay #endif
6520bdbc534SSatish Balay 
653d0a41580SSatish Balay   PetscFunctionBegin;
654d0a41580SSatish Balay 
6550bdbc534SSatish Balay   if (roworiented) {
6560bdbc534SSatish Balay     stepval = (n-1)*bs;
6570bdbc534SSatish Balay   } else {
6580bdbc534SSatish Balay     stepval = (m-1)*bs;
6590bdbc534SSatish Balay   }
6600bdbc534SSatish Balay   for (i=0; i<m; i++) {
661aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
66229bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
66329bbc08cSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
6640bdbc534SSatish Balay #endif
6650bdbc534SSatish Balay     row   = im[i];
666187ce0cbSSatish Balay     v_t   = v + i*bs2;
667c2760754SSatish Balay     if (row >= rstart && row < rend) {
6680bdbc534SSatish Balay       for (j=0; j<n; j++) {
6690bdbc534SSatish Balay         col = in[j];
6700bdbc534SSatish Balay 
6710bdbc534SSatish Balay         /* Look up into the Hash Table */
672c2760754SSatish Balay         key = row*Nbs+col+1;
673c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6740bdbc534SSatish Balay 
675c2760754SSatish Balay         idx = h1;
676aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
677187ce0cbSSatish Balay         total_ct++;
678187ce0cbSSatish Balay         insert_ct++;
679187ce0cbSSatish Balay        if (HT[idx] != key) {
680187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
681187ce0cbSSatish Balay           if (idx == size) {
682187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
683187ce0cbSSatish Balay             if (idx == h1) {
68429bbc08cSBarry Smith               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
685187ce0cbSSatish Balay             }
686187ce0cbSSatish Balay           }
687187ce0cbSSatish Balay         }
688187ce0cbSSatish Balay #else
689c2760754SSatish Balay         if (HT[idx] != key) {
690c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
691c2760754SSatish Balay           if (idx == size) {
692c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
693c2760754SSatish Balay             if (idx == h1) {
69429bbc08cSBarry Smith               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
695c2760754SSatish Balay             }
696c2760754SSatish Balay           }
697c2760754SSatish Balay         }
698187ce0cbSSatish Balay #endif
699c2760754SSatish Balay         baij_a = HD[idx];
7000bdbc534SSatish Balay         if (roworiented) {
701c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
702187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
703187ce0cbSSatish Balay           value = v_t;
704187ce0cbSSatish Balay           v_t  += bs;
705fef45726SSatish Balay           if (addv == ADD_VALUES) {
706c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
707c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
708fef45726SSatish Balay                 baij_a[jj]  += *value++;
709b4cc0f5aSSatish Balay               }
710b4cc0f5aSSatish Balay             }
711fef45726SSatish Balay           } else {
712c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
713c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
714fef45726SSatish Balay                 baij_a[jj]  = *value++;
715fef45726SSatish Balay               }
716fef45726SSatish Balay             }
717fef45726SSatish Balay           }
7180bdbc534SSatish Balay         } else {
7190bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
720fef45726SSatish Balay           if (addv == ADD_VALUES) {
721b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
7220bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
723fef45726SSatish Balay                 baij_a[jj]  += *value++;
724fef45726SSatish Balay               }
725fef45726SSatish Balay             }
726fef45726SSatish Balay           } else {
727fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
728fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
729fef45726SSatish Balay                 baij_a[jj]  = *value++;
730fef45726SSatish Balay               }
731b4cc0f5aSSatish Balay             }
7320bdbc534SSatish Balay           }
7330bdbc534SSatish Balay         }
7340bdbc534SSatish Balay       }
7350bdbc534SSatish Balay     } else {
7360bdbc534SSatish Balay       if (!baij->donotstash) {
7370bdbc534SSatish Balay         if (roworiented) {
7388798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7390bdbc534SSatish Balay         } else {
7408798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7410bdbc534SSatish Balay         }
7420bdbc534SSatish Balay       }
7430bdbc534SSatish Balay     }
7440bdbc534SSatish Balay   }
745aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
746187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
747187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
748187ce0cbSSatish Balay #endif
7490bdbc534SSatish Balay   PetscFunctionReturn(0);
7500bdbc534SSatish Balay }
751133cdb44SSatish Balay 
7520bdbc534SSatish Balay #undef __FUNC__
753b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ"
754ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
755d6de1c52SSatish Balay {
756d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
757d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
75848e59246SSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
759d6de1c52SSatish Balay 
760133cdb44SSatish Balay   PetscFunctionBegin;
761d6de1c52SSatish Balay   for (i=0; i<m; i++) {
76229bbc08cSBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
76329bbc08cSBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
764d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
765d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
766d6de1c52SSatish Balay       for (j=0; j<n; j++) {
76729bbc08cSBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
76829bbc08cSBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
769d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
770d6de1c52SSatish Balay           col = idxn[j] - bscstart;
77198dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
772d64ed03dSBarry Smith         } else {
773905e6a2fSBarry Smith           if (!baij->colmap) {
774905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
775905e6a2fSBarry Smith           }
776aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7770f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
778fa46199cSSatish Balay           data --;
77948e59246SSatish Balay #else
78048e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
78148e59246SSatish Balay #endif
78248e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
783d9d09a02SSatish Balay           else {
78448e59246SSatish Balay             col  = data + idxn[j]%bs;
78598dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786d6de1c52SSatish Balay           }
787d6de1c52SSatish Balay         }
788d6de1c52SSatish Balay       }
789d64ed03dSBarry Smith     } else {
79029bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
791d6de1c52SSatish Balay     }
792d6de1c52SSatish Balay   }
7933a40ed3dSBarry Smith  PetscFunctionReturn(0);
794d6de1c52SSatish Balay }
795d6de1c52SSatish Balay 
7965615d1e5SSatish Balay #undef __FUNC__
797b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ"
798329f5518SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
799d6de1c52SSatish Balay {
800d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
801d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
802acdf5bf4SSatish Balay   int        ierr,i,bs2=baij->bs2;
803329f5518SBarry Smith   PetscReal  sum = 0.0;
8043eda8832SBarry Smith   MatScalar  *v;
805d6de1c52SSatish Balay 
806d64ed03dSBarry Smith   PetscFunctionBegin;
807d6de1c52SSatish Balay   if (baij->size == 1) {
808d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
809d6de1c52SSatish Balay   } else {
810d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
811d6de1c52SSatish Balay       v = amat->a;
812d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++) {
813aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
814329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
815d6de1c52SSatish Balay #else
816d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
817d6de1c52SSatish Balay #endif
818d6de1c52SSatish Balay       }
819d6de1c52SSatish Balay       v = bmat->a;
820d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++) {
821aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
822329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
823d6de1c52SSatish Balay #else
824d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
825d6de1c52SSatish Balay #endif
826d6de1c52SSatish Balay       }
827ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
828d6de1c52SSatish Balay       *norm = sqrt(*norm);
829d64ed03dSBarry Smith     } else {
83029bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
831d6de1c52SSatish Balay     }
832d64ed03dSBarry Smith   }
8333a40ed3dSBarry Smith   PetscFunctionReturn(0);
834d6de1c52SSatish Balay }
83557b952d6SSatish Balay 
836bd7f49f5SSatish Balay 
837fef45726SSatish Balay /*
838fef45726SSatish Balay   Creates the hash table, and sets the table
839fef45726SSatish Balay   This table is created only once.
840fef45726SSatish Balay   If new entried need to be added to the matrix
841fef45726SSatish Balay   then the hash table has to be destroyed and
842fef45726SSatish Balay   recreated.
843fef45726SSatish Balay */
844fef45726SSatish Balay #undef __FUNC__
845b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private"
846329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
847596b8d2eSBarry Smith {
848596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
849596b8d2eSBarry Smith   Mat         A = baij->A,B=baij->B;
850596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
8510bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
852549d3d68SSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
853187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
854fef45726SSatish Balay   int         *HT,key;
8553eda8832SBarry Smith   MatScalar   **HD;
856329f5518SBarry Smith   PetscReal   tmp;
857aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
8584a15367fSSatish Balay   int         ct=0,max=0;
8594a15367fSSatish Balay #endif
860fef45726SSatish Balay 
861d64ed03dSBarry Smith   PetscFunctionBegin;
8620bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
8630bdbc534SSatish Balay   size = baij->ht_size;
864fef45726SSatish Balay 
8650bdbc534SSatish Balay   if (baij->ht) {
8660bdbc534SSatish Balay     PetscFunctionReturn(0);
867596b8d2eSBarry Smith   }
8680bdbc534SSatish Balay 
869fef45726SSatish Balay   /* Allocate Memory for Hash Table */
8703eda8832SBarry Smith   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
871b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
872b9e4cc15SSatish Balay   HD = baij->hd;
873a07cd24cSSatish Balay   HT = baij->ht;
874b9e4cc15SSatish Balay 
875b9e4cc15SSatish Balay 
876549d3d68SSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
8770bdbc534SSatish Balay 
878596b8d2eSBarry Smith 
879596b8d2eSBarry Smith   /* Loop Over A */
8800bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
881596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
8820bdbc534SSatish Balay       row = i+rstart;
8830bdbc534SSatish Balay       col = aj[j]+cstart;
884596b8d2eSBarry Smith 
885187ce0cbSSatish Balay       key = row*Nbs + col + 1;
886c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8870bdbc534SSatish Balay       for (k=0; k<size; k++){
8880bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8890bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8900bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
891596b8d2eSBarry Smith           break;
892aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
893187ce0cbSSatish Balay         } else {
894187ce0cbSSatish Balay           ct++;
895187ce0cbSSatish Balay #endif
896596b8d2eSBarry Smith         }
897187ce0cbSSatish Balay       }
898aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
899187ce0cbSSatish Balay       if (k> max) max = k;
900187ce0cbSSatish Balay #endif
901596b8d2eSBarry Smith     }
902596b8d2eSBarry Smith   }
903596b8d2eSBarry Smith   /* Loop Over B */
9040bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
905596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
9060bdbc534SSatish Balay       row = i+rstart;
9070bdbc534SSatish Balay       col = garray[bj[j]];
908187ce0cbSSatish Balay       key = row*Nbs + col + 1;
909c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9100bdbc534SSatish Balay       for (k=0; k<size; k++){
9110bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
9120bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9130bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
914596b8d2eSBarry Smith           break;
915aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
916187ce0cbSSatish Balay         } else {
917187ce0cbSSatish Balay           ct++;
918187ce0cbSSatish Balay #endif
919596b8d2eSBarry Smith         }
920187ce0cbSSatish Balay       }
921aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
922187ce0cbSSatish Balay       if (k> max) max = k;
923187ce0cbSSatish Balay #endif
924596b8d2eSBarry Smith     }
925596b8d2eSBarry Smith   }
926596b8d2eSBarry Smith 
927596b8d2eSBarry Smith   /* Print Summary */
928aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
929c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
930596b8d2eSBarry Smith     if (HT[i]) {j++;}
931c38d4ed2SBarry Smith   }
932329f5518SBarry Smith   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
933187ce0cbSSatish Balay #endif
9343a40ed3dSBarry Smith   PetscFunctionReturn(0);
935596b8d2eSBarry Smith }
93657b952d6SSatish Balay 
937bbb85fb3SSatish Balay #undef __FUNC__
938b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ"
939bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
940bbb85fb3SSatish Balay {
941bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
942ff2fd236SBarry Smith   int         ierr,nstash,reallocs;
943bbb85fb3SSatish Balay   InsertMode  addv;
944bbb85fb3SSatish Balay 
945bbb85fb3SSatish Balay   PetscFunctionBegin;
946bbb85fb3SSatish Balay   if (baij->donotstash) {
947bbb85fb3SSatish Balay     PetscFunctionReturn(0);
948bbb85fb3SSatish Balay   }
949bbb85fb3SSatish Balay 
950bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
951bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
952bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
95329bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
954bbb85fb3SSatish Balay   }
955bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
956bbb85fb3SSatish Balay 
9578798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
9588798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
9598798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
9605a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
9618798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
9625a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
963bbb85fb3SSatish Balay   PetscFunctionReturn(0);
964bbb85fb3SSatish Balay }
965bbb85fb3SSatish Balay 
966bbb85fb3SSatish Balay #undef __FUNC__
967b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ"
968bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
969bbb85fb3SSatish Balay {
970bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
971a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
972a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
9737c922b88SBarry Smith   int         *row,*col,other_disassembled;
9747c922b88SBarry Smith   PetscTruth  r1,r2,r3;
9753eda8832SBarry Smith   MatScalar   *val;
976bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
977bbb85fb3SSatish Balay 
978bbb85fb3SSatish Balay   PetscFunctionBegin;
979bbb85fb3SSatish Balay   if (!baij->donotstash) {
980a2d1c673SSatish Balay     while (1) {
9818798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
982a2d1c673SSatish Balay       if (!flg) break;
983a2d1c673SSatish Balay 
984bbb85fb3SSatish Balay       for (i=0; i<n;) {
985bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
986bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
987bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
988bbb85fb3SSatish Balay         else       ncols = n-i;
989bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
99093fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
991bbb85fb3SSatish Balay         i = j;
992bbb85fb3SSatish Balay       }
993bbb85fb3SSatish Balay     }
9948798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
995a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
996a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
997a2d1c673SSatish Balay        restore the original flags */
998a2d1c673SSatish Balay     r1 = baij->roworiented;
999a2d1c673SSatish Balay     r2 = a->roworiented;
1000a2d1c673SSatish Balay     r3 = b->roworiented;
10017c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
10027c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
10037c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
1004a2d1c673SSatish Balay     while (1) {
10058798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1006a2d1c673SSatish Balay       if (!flg) break;
1007a2d1c673SSatish Balay 
1008a2d1c673SSatish Balay       for (i=0; i<n;) {
1009a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1010a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1011a2d1c673SSatish Balay         if (j < n) ncols = j-i;
1012a2d1c673SSatish Balay         else       ncols = n-i;
101393fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1014a2d1c673SSatish Balay         i = j;
1015a2d1c673SSatish Balay       }
1016a2d1c673SSatish Balay     }
10178798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1018a2d1c673SSatish Balay     baij->roworiented = r1;
1019a2d1c673SSatish Balay     a->roworiented    = r2;
1020a2d1c673SSatish Balay     b->roworiented    = r3;
1021bbb85fb3SSatish Balay   }
1022bbb85fb3SSatish Balay 
1023bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1024bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1025bbb85fb3SSatish Balay 
1026bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
1027bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1028bbb85fb3SSatish Balay   /*
1029bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1030bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1031bbb85fb3SSatish Balay   */
1032bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1033bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1034bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1035bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1036bbb85fb3SSatish Balay     }
1037bbb85fb3SSatish Balay   }
1038bbb85fb3SSatish Balay 
1039bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1040bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1041bbb85fb3SSatish Balay   }
1042bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1043bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1044bbb85fb3SSatish Balay 
1045aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
1046bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1047c38d4ed2SBarry Smith     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1048bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1049bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1050bbb85fb3SSatish Balay   }
1051bbb85fb3SSatish Balay #endif
1052bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1053bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1054bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1055bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1056bbb85fb3SSatish Balay   }
1057bbb85fb3SSatish Balay 
1058606d414cSSatish Balay   if (baij->rowvalues) {
1059606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1060606d414cSSatish Balay     baij->rowvalues = 0;
1061606d414cSSatish Balay   }
1062bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1063bbb85fb3SSatish Balay }
106457b952d6SSatish Balay 
10655615d1e5SSatish Balay #undef __FUNC__
1066b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket"
10677b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
106857b952d6SSatish Balay {
106957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
107077ed5343SBarry Smith   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
10716831982aSBarry Smith   PetscTruth   isascii,isdraw;
1072f14a1c24SBarry Smith   Viewer       sviewer;
107357b952d6SSatish Balay 
1074d64ed03dSBarry Smith   PetscFunctionBegin;
10756831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
10766831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
10770f5bd95cSBarry Smith   if (isascii) {
1078d41123aaSBarry Smith     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1079639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
10804e220ebcSLois Curfman McInnes       MatInfo info;
1081d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1082d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
10836831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
10844e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
10856831982aSBarry Smith               baij->bs,(int)info.memory);CHKERRQ(ierr);
1086d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
10876831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1088d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
10896831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
10906831982aSBarry Smith       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
109157b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
10923a40ed3dSBarry Smith       PetscFunctionReturn(0);
1093d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
10946831982aSBarry Smith       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
10953a40ed3dSBarry Smith       PetscFunctionReturn(0);
109657b952d6SSatish Balay     }
109757b952d6SSatish Balay   }
109857b952d6SSatish Balay 
10990f5bd95cSBarry Smith   if (isdraw) {
110057b952d6SSatish Balay     Draw       draw;
110157b952d6SSatish Balay     PetscTruth isnull;
110277ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
11033a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
110457b952d6SSatish Balay   }
110557b952d6SSatish Balay 
110657b952d6SSatish Balay   if (size == 1) {
110757b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1108d64ed03dSBarry Smith   } else {
110957b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
111057b952d6SSatish Balay     Mat         A;
111157b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
11123eda8832SBarry Smith     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
11133eda8832SBarry Smith     MatScalar   *a;
111457b952d6SSatish Balay 
111557b952d6SSatish Balay     if (!rank) {
111655843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1117d64ed03dSBarry Smith     } else {
111855843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
111957b952d6SSatish Balay     }
112057b952d6SSatish Balay     PLogObjectParent(mat,A);
112157b952d6SSatish Balay 
112257b952d6SSatish Balay     /* copy over the A part */
112357b952d6SSatish Balay     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
112457b952d6SSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
112557b952d6SSatish Balay     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
112657b952d6SSatish Balay 
112757b952d6SSatish Balay     for (i=0; i<mbs; i++) {
112857b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
112957b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
113057b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
113157b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
113257b952d6SSatish Balay         for (k=0; k<bs; k++) {
113393fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1134cee3aa6bSSatish Balay           col++; a += bs;
113557b952d6SSatish Balay         }
113657b952d6SSatish Balay       }
113757b952d6SSatish Balay     }
113857b952d6SSatish Balay     /* copy over the B part */
113957b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
114057b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
114157b952d6SSatish Balay     for (i=0; i<mbs; i++) {
114257b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
114357b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
114457b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
114557b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
114657b952d6SSatish Balay         for (k=0; k<bs; k++) {
114793fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1148cee3aa6bSSatish Balay           col++; a += bs;
114957b952d6SSatish Balay         }
115057b952d6SSatish Balay       }
115157b952d6SSatish Balay     }
1152606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
11536d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
11546d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
115555843e3eSBarry Smith     /*
115655843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
115755843e3eSBarry Smith        synchronized across all processors that share the Draw object
115855843e3eSBarry Smith     */
11596831982aSBarry Smith     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1160f14a1c24SBarry Smith     if (!rank) {
11616831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
116257b952d6SSatish Balay     }
1163f14a1c24SBarry Smith     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
116457b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
116557b952d6SSatish Balay   }
11663a40ed3dSBarry Smith   PetscFunctionReturn(0);
116757b952d6SSatish Balay }
116857b952d6SSatish Balay 
11695615d1e5SSatish Balay #undef __FUNC__
1170b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ"
1171e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer)
117257b952d6SSatish Balay {
117357b952d6SSatish Balay   int        ierr;
11746831982aSBarry Smith   PetscTruth isascii,isdraw,issocket,isbinary;
117557b952d6SSatish Balay 
1176d64ed03dSBarry Smith   PetscFunctionBegin;
11776831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
11786831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
11796831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
11806831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
11810f5bd95cSBarry Smith   if (isascii || isdraw || issocket || isbinary) {
11827b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
11835cd90555SBarry Smith   } else {
118429bbc08cSBarry Smith     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
118557b952d6SSatish Balay   }
11863a40ed3dSBarry Smith   PetscFunctionReturn(0);
118757b952d6SSatish Balay }
118857b952d6SSatish Balay 
11895615d1e5SSatish Balay #undef __FUNC__
1190b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ"
1191e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
119279bdfe76SSatish Balay {
119379bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
119479bdfe76SSatish Balay   int         ierr;
119579bdfe76SSatish Balay 
1196d64ed03dSBarry Smith   PetscFunctionBegin;
119798dd23e9SBarry Smith 
119898dd23e9SBarry Smith   if (mat->mapping) {
119998dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
120098dd23e9SBarry Smith   }
120198dd23e9SBarry Smith   if (mat->bmapping) {
120298dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
120398dd23e9SBarry Smith   }
120461b13de0SBarry Smith   if (mat->rmap) {
120561b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
120661b13de0SBarry Smith   }
120761b13de0SBarry Smith   if (mat->cmap) {
120861b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
120961b13de0SBarry Smith   }
1210aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1211e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
121279bdfe76SSatish Balay #endif
121379bdfe76SSatish Balay 
12148798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12158798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
12168798bf22SSatish Balay 
1217606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
121879bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
121979bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1220aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
12210f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
122248e59246SSatish Balay #else
1223606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
122448e59246SSatish Balay #endif
1225606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1226606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1227606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1228606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1229606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1230606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
12316fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
12326fa18ffdSBarry Smith   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
12336fa18ffdSBarry Smith #endif
1234606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
123579bdfe76SSatish Balay   PLogObjectDestroy(mat);
123679bdfe76SSatish Balay   PetscHeaderDestroy(mat);
12373a40ed3dSBarry Smith   PetscFunctionReturn(0);
123879bdfe76SSatish Balay }
123979bdfe76SSatish Balay 
12405615d1e5SSatish Balay #undef __FUNC__
1241b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ"
1242ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1243cee3aa6bSSatish Balay {
1244cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
124547b4a8eaSLois Curfman McInnes   int         ierr,nt;
1246cee3aa6bSSatish Balay 
1247d64ed03dSBarry Smith   PetscFunctionBegin;
1248e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
124947b4a8eaSLois Curfman McInnes   if (nt != a->n) {
125029bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
125147b4a8eaSLois Curfman McInnes   }
1252e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
125347b4a8eaSLois Curfman McInnes   if (nt != a->m) {
125429bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
125547b4a8eaSLois Curfman McInnes   }
125643a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1257f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
125843a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1259f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
126043a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
12613a40ed3dSBarry Smith   PetscFunctionReturn(0);
1262cee3aa6bSSatish Balay }
1263cee3aa6bSSatish Balay 
12645615d1e5SSatish Balay #undef __FUNC__
1265b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ"
1266ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1267cee3aa6bSSatish Balay {
1268cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1269cee3aa6bSSatish Balay   int        ierr;
1270d64ed03dSBarry Smith 
1271d64ed03dSBarry Smith   PetscFunctionBegin;
127243a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1273f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
127443a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1275f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12763a40ed3dSBarry Smith   PetscFunctionReturn(0);
1277cee3aa6bSSatish Balay }
1278cee3aa6bSSatish Balay 
12795615d1e5SSatish Balay #undef __FUNC__
1280b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ"
12817c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1282cee3aa6bSSatish Balay {
1283cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1284cee3aa6bSSatish Balay   int         ierr;
1285cee3aa6bSSatish Balay 
1286d64ed03dSBarry Smith   PetscFunctionBegin;
1287cee3aa6bSSatish Balay   /* do nondiagonal part */
12887c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1289cee3aa6bSSatish Balay   /* send it on its way */
1290537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1291cee3aa6bSSatish Balay   /* do local part */
12927c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1293cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1294cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1295cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1296639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12973a40ed3dSBarry Smith   PetscFunctionReturn(0);
1298cee3aa6bSSatish Balay }
1299cee3aa6bSSatish Balay 
13005615d1e5SSatish Balay #undef __FUNC__
1301b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ"
13027c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1303cee3aa6bSSatish Balay {
1304cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1305cee3aa6bSSatish Balay   int         ierr;
1306cee3aa6bSSatish Balay 
1307d64ed03dSBarry Smith   PetscFunctionBegin;
1308cee3aa6bSSatish Balay   /* do nondiagonal part */
13097c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1310cee3aa6bSSatish Balay   /* send it on its way */
1311537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1312cee3aa6bSSatish Balay   /* do local part */
13137c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1314cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1315cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1316cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1317537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
13183a40ed3dSBarry Smith   PetscFunctionReturn(0);
1319cee3aa6bSSatish Balay }
1320cee3aa6bSSatish Balay 
1321cee3aa6bSSatish Balay /*
1322cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1323cee3aa6bSSatish Balay    diagonal block
1324cee3aa6bSSatish Balay */
13255615d1e5SSatish Balay #undef __FUNC__
1326b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ"
1327ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1328cee3aa6bSSatish Balay {
1329cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
13303a40ed3dSBarry Smith   int         ierr;
1331d64ed03dSBarry Smith 
1332d64ed03dSBarry Smith   PetscFunctionBegin;
133329bbc08cSBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13343a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13353a40ed3dSBarry Smith   PetscFunctionReturn(0);
1336cee3aa6bSSatish Balay }
1337cee3aa6bSSatish Balay 
13385615d1e5SSatish Balay #undef __FUNC__
1339b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ"
1340ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1341cee3aa6bSSatish Balay {
1342cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1343cee3aa6bSSatish Balay   int         ierr;
1344d64ed03dSBarry Smith 
1345d64ed03dSBarry Smith   PetscFunctionBegin;
1346cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1347cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
13483a40ed3dSBarry Smith   PetscFunctionReturn(0);
1349cee3aa6bSSatish Balay }
1350026e39d0SSatish Balay 
13515615d1e5SSatish Balay #undef __FUNC__
1352b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ"
1353ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
135457b952d6SSatish Balay {
135557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1356d64ed03dSBarry Smith 
1357d64ed03dSBarry Smith   PetscFunctionBegin;
1358a21fb8cbSBarry Smith   if (m) *m = mat->rstart*mat->bs;
1359a21fb8cbSBarry Smith   if (n) *n = mat->rend*mat->bs;
13603a40ed3dSBarry Smith   PetscFunctionReturn(0);
136157b952d6SSatish Balay }
136257b952d6SSatish Balay 
13635615d1e5SSatish Balay #undef __FUNC__
1364b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ"
1365acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1366acdf5bf4SSatish Balay {
1367acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1368acdf5bf4SSatish Balay   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1369acdf5bf4SSatish Balay   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1370d9d09a02SSatish Balay   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1371d9d09a02SSatish Balay   int        *cmap,*idx_p,cstart = mat->cstart;
1372acdf5bf4SSatish Balay 
1373d64ed03dSBarry Smith   PetscFunctionBegin;
137429bbc08cSBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1375acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1376acdf5bf4SSatish Balay 
1377acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1378acdf5bf4SSatish Balay     /*
1379acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1380acdf5bf4SSatish Balay     */
1381acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1382bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1383bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1384acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1385acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1386acdf5bf4SSatish Balay     }
1387549d3d68SSatish Balay     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1388acdf5bf4SSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1389acdf5bf4SSatish Balay   }
1390acdf5bf4SSatish Balay 
139129bbc08cSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1392d9d09a02SSatish Balay   lrow = row - brstart;
1393acdf5bf4SSatish Balay 
1394acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1395acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1396acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1397f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1398f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1399acdf5bf4SSatish Balay   nztot = nzA + nzB;
1400acdf5bf4SSatish Balay 
1401acdf5bf4SSatish Balay   cmap  = mat->garray;
1402acdf5bf4SSatish Balay   if (v  || idx) {
1403acdf5bf4SSatish Balay     if (nztot) {
1404acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1405acdf5bf4SSatish Balay       int imark = -1;
1406acdf5bf4SSatish Balay       if (v) {
1407acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1408acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1409d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1410acdf5bf4SSatish Balay           else break;
1411acdf5bf4SSatish Balay         }
1412acdf5bf4SSatish Balay         imark = i;
1413acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1414acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1415acdf5bf4SSatish Balay       }
1416acdf5bf4SSatish Balay       if (idx) {
1417acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1418acdf5bf4SSatish Balay         if (imark > -1) {
1419acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1420bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1421acdf5bf4SSatish Balay           }
1422acdf5bf4SSatish Balay         } else {
1423acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1424d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1425d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1426acdf5bf4SSatish Balay             else break;
1427acdf5bf4SSatish Balay           }
1428acdf5bf4SSatish Balay           imark = i;
1429acdf5bf4SSatish Balay         }
1430d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1431d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1432acdf5bf4SSatish Balay       }
1433d64ed03dSBarry Smith     } else {
1434d212a18eSSatish Balay       if (idx) *idx = 0;
1435d212a18eSSatish Balay       if (v)   *v   = 0;
1436d212a18eSSatish Balay     }
1437acdf5bf4SSatish Balay   }
1438acdf5bf4SSatish Balay   *nz = nztot;
1439f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1440f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14413a40ed3dSBarry Smith   PetscFunctionReturn(0);
1442acdf5bf4SSatish Balay }
1443acdf5bf4SSatish Balay 
14445615d1e5SSatish Balay #undef __FUNC__
1445b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ"
1446acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1447acdf5bf4SSatish Balay {
1448acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1449d64ed03dSBarry Smith 
1450d64ed03dSBarry Smith   PetscFunctionBegin;
1451acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
145229bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1453acdf5bf4SSatish Balay   }
1454acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14553a40ed3dSBarry Smith   PetscFunctionReturn(0);
1456acdf5bf4SSatish Balay }
1457acdf5bf4SSatish Balay 
14585615d1e5SSatish Balay #undef __FUNC__
1459b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ"
1460ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
14615a838052SSatish Balay {
14625a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1463d64ed03dSBarry Smith 
1464d64ed03dSBarry Smith   PetscFunctionBegin;
14655a838052SSatish Balay   *bs = baij->bs;
14663a40ed3dSBarry Smith   PetscFunctionReturn(0);
14675a838052SSatish Balay }
14685a838052SSatish Balay 
14695615d1e5SSatish Balay #undef __FUNC__
1470b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ"
1471ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
147258667388SSatish Balay {
147358667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
147458667388SSatish Balay   int         ierr;
1475d64ed03dSBarry Smith 
1476d64ed03dSBarry Smith   PetscFunctionBegin;
147758667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
147858667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14793a40ed3dSBarry Smith   PetscFunctionReturn(0);
148058667388SSatish Balay }
14810ac07820SSatish Balay 
14825615d1e5SSatish Balay #undef __FUNC__
1483b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ"
1484ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14850ac07820SSatish Balay {
14864e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
14874e220ebcSLois Curfman McInnes   Mat         A = a->A,B = a->B;
14887d57db60SLois Curfman McInnes   int         ierr;
1489329f5518SBarry Smith   PetscReal   isend[5],irecv[5];
14900ac07820SSatish Balay 
1491d64ed03dSBarry Smith   PetscFunctionBegin;
14924e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
14934e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
14940e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1495de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14964e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
14970e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1498de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
14990ac07820SSatish Balay   if (flag == MAT_LOCAL) {
15004e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
15014e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
15024e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
15034e220ebcSLois Curfman McInnes     info->memory       = isend[3];
15044e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
15050ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1506f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
15074e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15084e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15094e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15104e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15114e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
15120ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1513f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
15144e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15154e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15164e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15174e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15184e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1519d41123aaSBarry Smith   } else {
152029bbc08cSBarry Smith     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
15210ac07820SSatish Balay   }
15225a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
15235a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
15245a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
15255a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
15264e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
15274e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
15284e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
15293a40ed3dSBarry Smith   PetscFunctionReturn(0);
15300ac07820SSatish Balay }
15310ac07820SSatish Balay 
15325615d1e5SSatish Balay #undef __FUNC__
1533b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ"
1534ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
153558667388SSatish Balay {
153658667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
153798305bb5SBarry Smith   int         ierr;
153858667388SSatish Balay 
1539d64ed03dSBarry Smith   PetscFunctionBegin;
154058667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
154158667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
15426da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1543c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
15444787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
15457c922b88SBarry Smith       op == MAT_KEEP_ZEROED_ROWS ||
15464787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
154798305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
154898305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1549b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
15507c922b88SBarry Smith         a->roworiented = PETSC_TRUE;
155198305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
155298305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1553b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
15546da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
155558667388SSatish Balay              op == MAT_SYMMETRIC ||
155658667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1557b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
155898305bb5SBarry Smith              op == MAT_USE_HASH_TABLE) {
155958667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
156098305bb5SBarry Smith   } else if (op == MAT_COLUMN_ORIENTED) {
15617c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
156298305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
156398305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
15642b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
15657c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
1566d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
156729bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1568133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
15697c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
1570d64ed03dSBarry Smith   } else {
157129bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"unknown option");
1572d64ed03dSBarry Smith   }
15733a40ed3dSBarry Smith   PetscFunctionReturn(0);
157458667388SSatish Balay }
157558667388SSatish Balay 
15765615d1e5SSatish Balay #undef __FUNC__
1577b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ("
1578ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15790ac07820SSatish Balay {
15800ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
15810ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15820ac07820SSatish Balay   Mat         B;
158340011551SBarry Smith   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
15840ac07820SSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
15853eda8832SBarry Smith   MatScalar   *a;
15860ac07820SSatish Balay 
1587d64ed03dSBarry Smith   PetscFunctionBegin;
158829bbc08cSBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
15897c922b88SBarry Smith   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
15900ac07820SSatish Balay 
15910ac07820SSatish Balay   /* copy over the A part */
15920ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15930ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15940ac07820SSatish Balay   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
15950ac07820SSatish Balay 
15960ac07820SSatish Balay   for (i=0; i<mbs; i++) {
15970ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15980ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
15990ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16000ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
16010ac07820SSatish Balay       for (k=0; k<bs; k++) {
160293fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16030ac07820SSatish Balay         col++; a += bs;
16040ac07820SSatish Balay       }
16050ac07820SSatish Balay     }
16060ac07820SSatish Balay   }
16070ac07820SSatish Balay   /* copy over the B part */
16080ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
16090ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
16100ac07820SSatish Balay   for (i=0; i<mbs; i++) {
16110ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16120ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16130ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16140ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16150ac07820SSatish Balay       for (k=0; k<bs; k++) {
161693fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16170ac07820SSatish Balay         col++; a += bs;
16180ac07820SSatish Balay       }
16190ac07820SSatish Balay     }
16200ac07820SSatish Balay   }
1621606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
16220ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16230ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16240ac07820SSatish Balay 
16257c922b88SBarry Smith   if (matout) {
16260ac07820SSatish Balay     *matout = B;
16270ac07820SSatish Balay   } else {
1628f830108cSBarry Smith     PetscOps *Abops;
1629cc2dc46cSBarry Smith     MatOps   Aops;
1630f830108cSBarry Smith 
16310ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
1632606d414cSSatish Balay     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
16330ac07820SSatish Balay     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
16340ac07820SSatish Balay     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1635aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
16360f5bd95cSBarry Smith     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1637b1fc9764SSatish Balay #else
1638606d414cSSatish Balay     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1639b1fc9764SSatish Balay #endif
1640606d414cSSatish Balay     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1641606d414cSSatish Balay     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1642606d414cSSatish Balay     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1643606d414cSSatish Balay     ierr = PetscFree(baij);CHKERRQ(ierr);
1644f830108cSBarry Smith 
1645f830108cSBarry Smith     /*
1646f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1647f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1648f830108cSBarry Smith       else from C.
1649f830108cSBarry Smith     */
1650f830108cSBarry Smith     Abops   = A->bops;
1651f830108cSBarry Smith     Aops    = A->ops;
1652549d3d68SSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1653f830108cSBarry Smith     A->bops = Abops;
1654f830108cSBarry Smith     A->ops  = Aops;
1655f830108cSBarry Smith 
16560ac07820SSatish Balay     PetscHeaderDestroy(B);
16570ac07820SSatish Balay   }
16583a40ed3dSBarry Smith   PetscFunctionReturn(0);
16590ac07820SSatish Balay }
16600e95ebc0SSatish Balay 
16615615d1e5SSatish Balay #undef __FUNC__
1662b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ"
166336c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16640e95ebc0SSatish Balay {
166536c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
166636c4a09eSSatish Balay   Mat         a = baij->A,b = baij->B;
16670e95ebc0SSatish Balay   int         ierr,s1,s2,s3;
16680e95ebc0SSatish Balay 
1669d64ed03dSBarry Smith   PetscFunctionBegin;
167036c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
167136c4a09eSSatish Balay   if (rr) {
167236c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
167329bbc08cSBarry Smith     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
167436c4a09eSSatish Balay     /* Overlap communication with computation. */
167536c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
167636c4a09eSSatish Balay   }
16770e95ebc0SSatish Balay   if (ll) {
16780e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
167929bbc08cSBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1680a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
16810e95ebc0SSatish Balay   }
168236c4a09eSSatish Balay   /* scale  the diagonal block */
168336c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
168436c4a09eSSatish Balay 
168536c4a09eSSatish Balay   if (rr) {
168636c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
168736c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1688a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
168936c4a09eSSatish Balay   }
169036c4a09eSSatish Balay 
16913a40ed3dSBarry Smith   PetscFunctionReturn(0);
16920e95ebc0SSatish Balay }
16930e95ebc0SSatish Balay 
16945615d1e5SSatish Balay #undef __FUNC__
1695b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ"
1696ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
16970ac07820SSatish Balay {
16980ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
16990ac07820SSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1700a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
17010ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
17020ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1703a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
17040ac07820SSatish Balay   MPI_Comm       comm = A->comm;
17050ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
17060ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
17070ac07820SSatish Balay   IS             istmp;
17080ac07820SSatish Balay 
1709d64ed03dSBarry Smith   PetscFunctionBegin;
1710f14a1c24SBarry Smith   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
17110ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
17120ac07820SSatish Balay 
17130ac07820SSatish Balay   /*  first count number of contributors to each processor */
17140ac07820SSatish Balay   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1715549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1716549d3d68SSatish Balay   procs  = nprocs + size;
17170ac07820SSatish Balay   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
17180ac07820SSatish Balay   for (i=0; i<N; i++) {
17190ac07820SSatish Balay     idx   = rows[i];
17200ac07820SSatish Balay     found = 0;
17210ac07820SSatish Balay     for (j=0; j<size; j++) {
17220ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
17230ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
17240ac07820SSatish Balay       }
17250ac07820SSatish Balay     }
172629bbc08cSBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
17270ac07820SSatish Balay   }
17280ac07820SSatish Balay   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
17290ac07820SSatish Balay 
17300ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
17316831982aSBarry Smith   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
17326831982aSBarry Smith   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
17330ac07820SSatish Balay   nmax   = work[rank];
17346831982aSBarry Smith   nrecvs = work[size+rank];
1735606d414cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
17360ac07820SSatish Balay 
17370ac07820SSatish Balay   /* post receives:   */
1738d64ed03dSBarry Smith   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1739d64ed03dSBarry Smith   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
17400ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1741ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
17420ac07820SSatish Balay   }
17430ac07820SSatish Balay 
17440ac07820SSatish Balay   /* do sends:
17450ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
17460ac07820SSatish Balay      the ith processor
17470ac07820SSatish Balay   */
17480ac07820SSatish Balay   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1749ca161407SBarry Smith   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
17500ac07820SSatish Balay   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
17510ac07820SSatish Balay   starts[0]  = 0;
17520ac07820SSatish Balay   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
17530ac07820SSatish Balay   for (i=0; i<N; i++) {
17540ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
17550ac07820SSatish Balay   }
17566831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
17570ac07820SSatish Balay 
17580ac07820SSatish Balay   starts[0] = 0;
17590ac07820SSatish Balay   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
17600ac07820SSatish Balay   count = 0;
17610ac07820SSatish Balay   for (i=0; i<size; i++) {
17620ac07820SSatish Balay     if (procs[i]) {
1763ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17640ac07820SSatish Balay     }
17650ac07820SSatish Balay   }
1766606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
17670ac07820SSatish Balay 
17680ac07820SSatish Balay   base = owners[rank]*bs;
17690ac07820SSatish Balay 
17700ac07820SSatish Balay   /*  wait on receives */
17710ac07820SSatish Balay   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
17720ac07820SSatish Balay   source = lens + nrecvs;
17730ac07820SSatish Balay   count  = nrecvs; slen = 0;
17740ac07820SSatish Balay   while (count) {
1775ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17760ac07820SSatish Balay     /* unpack receives into our local space */
1777ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
17780ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17790ac07820SSatish Balay     lens[imdex]    = n;
17800ac07820SSatish Balay     slen          += n;
17810ac07820SSatish Balay     count--;
17820ac07820SSatish Balay   }
1783606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
17840ac07820SSatish Balay 
17850ac07820SSatish Balay   /* move the data into the send scatter */
17860ac07820SSatish Balay   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
17870ac07820SSatish Balay   count = 0;
17880ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
17890ac07820SSatish Balay     values = rvalues + i*nmax;
17900ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
17910ac07820SSatish Balay       lrows[count++] = values[j] - base;
17920ac07820SSatish Balay     }
17930ac07820SSatish Balay   }
1794606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1795606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1796606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1797606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
17980ac07820SSatish Balay 
17990ac07820SSatish Balay   /* actually zap the local rows */
1800029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
18010ac07820SSatish Balay   PLogObjectParent(A,istmp);
1802a07cd24cSSatish Balay 
180372dacd9aSBarry Smith   /*
180472dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
180572dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
180672dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
180772dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
180872dacd9aSBarry Smith 
180972dacd9aSBarry Smith        Contributed by: Mathew Knepley
181072dacd9aSBarry Smith   */
18119c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
18126fa18ffdSBarry Smith   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
18139c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
18146fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
18159c957beeSSatish Balay   } else if (diag) {
18166fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1817fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
181829bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1819fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
18206525c446SSatish Balay     }
1821a07cd24cSSatish Balay     for (i=0; i<slen; i++) {
1822a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
18233f11ea53SBarry Smith       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1824a07cd24cSSatish Balay     }
1825a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1826a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
18279c957beeSSatish Balay   } else {
18286fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1829a07cd24cSSatish Balay   }
18309c957beeSSatish Balay 
18319c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1832606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1833a07cd24cSSatish Balay 
18340ac07820SSatish Balay   /* wait on sends */
18350ac07820SSatish Balay   if (nsends) {
1836d64ed03dSBarry Smith     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1837ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1838606d414cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
18390ac07820SSatish Balay   }
1840606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1841606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
18420ac07820SSatish Balay 
18433a40ed3dSBarry Smith   PetscFunctionReturn(0);
18440ac07820SSatish Balay }
184572dacd9aSBarry Smith 
18465615d1e5SSatish Balay #undef __FUNC__
1847b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ"
1848ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1849ba4ca20aSSatish Balay {
1850ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
185125fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1852133cdb44SSatish Balay   static int  called = 0;
18533a40ed3dSBarry Smith   int         ierr;
1854ba4ca20aSSatish Balay 
1855d64ed03dSBarry Smith   PetscFunctionBegin;
18563a40ed3dSBarry Smith   if (!a->rank) {
18573a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
185825fdafccSSatish Balay   }
185925fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1860d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1861d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
18623a40ed3dSBarry Smith   PetscFunctionReturn(0);
1863ba4ca20aSSatish Balay }
18640ac07820SSatish Balay 
18655615d1e5SSatish Balay #undef __FUNC__
1866b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ"
1867ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1868bb5a7306SBarry Smith {
1869bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1870bb5a7306SBarry Smith   int         ierr;
1871d64ed03dSBarry Smith 
1872d64ed03dSBarry Smith   PetscFunctionBegin;
1873bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
18743a40ed3dSBarry Smith   PetscFunctionReturn(0);
1875bb5a7306SBarry Smith }
1876bb5a7306SBarry Smith 
18772e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18780ac07820SSatish Balay 
18797fc3c18eSBarry Smith #undef __FUNC__
1880b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ"
18817fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
18827fc3c18eSBarry Smith {
18837fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
18847fc3c18eSBarry Smith   Mat         a,b,c,d;
18857fc3c18eSBarry Smith   PetscTruth  flg;
18867fc3c18eSBarry Smith   int         ierr;
18877fc3c18eSBarry Smith 
18887fc3c18eSBarry Smith   PetscFunctionBegin;
188929bbc08cSBarry Smith   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
18907fc3c18eSBarry Smith   a = matA->A; b = matA->B;
18917fc3c18eSBarry Smith   c = matB->A; d = matB->B;
18927fc3c18eSBarry Smith 
18937fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
18947fc3c18eSBarry Smith   if (flg == PETSC_TRUE) {
18957fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18967fc3c18eSBarry Smith   }
18977fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
18987fc3c18eSBarry Smith   PetscFunctionReturn(0);
18997fc3c18eSBarry Smith }
19007fc3c18eSBarry Smith 
190179bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1902cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1903cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1904cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1905cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1906cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1907cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
19087c922b88SBarry Smith   MatMultTranspose_MPIBAIJ,
19097c922b88SBarry Smith   MatMultTransposeAdd_MPIBAIJ,
1910cc2dc46cSBarry Smith   0,
1911cc2dc46cSBarry Smith   0,
1912cc2dc46cSBarry Smith   0,
1913cc2dc46cSBarry Smith   0,
1914cc2dc46cSBarry Smith   0,
1915cc2dc46cSBarry Smith   0,
1916cc2dc46cSBarry Smith   0,
1917cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1918cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
19197fc3c18eSBarry Smith   MatEqual_MPIBAIJ,
1920cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1921cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1922cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1923cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1924cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1925cc2dc46cSBarry Smith   0,
1926cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1927cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1928cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1929cc2dc46cSBarry Smith   0,
1930cc2dc46cSBarry Smith   0,
1931cc2dc46cSBarry Smith   0,
1932cc2dc46cSBarry Smith   0,
1933cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1934cc2dc46cSBarry Smith   0,
1935cc2dc46cSBarry Smith   0,
1936cc2dc46cSBarry Smith   0,
1937cc2dc46cSBarry Smith   0,
19382e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1939cc2dc46cSBarry Smith   0,
1940cc2dc46cSBarry Smith   0,
1941cc2dc46cSBarry Smith   0,
1942cc2dc46cSBarry Smith   0,
1943cc2dc46cSBarry Smith   0,
1944cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1945cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1946cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1947cc2dc46cSBarry Smith   0,
1948cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1949cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1950cc2dc46cSBarry Smith   0,
1951cc2dc46cSBarry Smith   0,
1952cc2dc46cSBarry Smith   0,
1953cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1954cc2dc46cSBarry Smith   0,
1955cc2dc46cSBarry Smith   0,
1956cc2dc46cSBarry Smith   0,
1957cc2dc46cSBarry Smith   0,
1958cc2dc46cSBarry Smith   0,
1959cc2dc46cSBarry Smith   0,
1960cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
1961cc2dc46cSBarry Smith   0,
1962cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
1963cc2dc46cSBarry Smith   0,
1964f14a1c24SBarry Smith   MatDestroy_MPIBAIJ,
1965f14a1c24SBarry Smith   MatView_MPIBAIJ,
1966*7843d17aSBarry Smith   MatGetMaps_Petsc,
1967*7843d17aSBarry Smith   0,
1968*7843d17aSBarry Smith   0,
1969*7843d17aSBarry Smith   0,
1970*7843d17aSBarry Smith   0,
1971*7843d17aSBarry Smith   0,
1972*7843d17aSBarry Smith   0,
1973*7843d17aSBarry Smith   MatGetRowMax_MPIBAIJ};
197479bdfe76SSatish Balay 
19755ef9f2a5SBarry Smith 
1976e18c124aSSatish Balay EXTERN_C_BEGIN
19775ef9f2a5SBarry Smith #undef __FUNC__
1978b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ"
19795ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
19805ef9f2a5SBarry Smith {
19815ef9f2a5SBarry Smith   PetscFunctionBegin;
19825ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
19835ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
19845ef9f2a5SBarry Smith   PetscFunctionReturn(0);
19855ef9f2a5SBarry Smith }
1986e18c124aSSatish Balay EXTERN_C_END
198779bdfe76SSatish Balay 
19885615d1e5SSatish Balay #undef __FUNC__
1989b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ"
199079bdfe76SSatish Balay /*@C
199179bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
199279bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
199379bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
199479bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
199579bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
199679bdfe76SSatish Balay 
1997db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1998db81eaa0SLois Curfman McInnes 
199979bdfe76SSatish Balay    Input Parameters:
2000db81eaa0SLois Curfman McInnes +  comm - MPI communicator
200179bdfe76SSatish Balay .  bs   - size of blockk
200279bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
200392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
200492e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
200592e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
200692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
200792e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
2008be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2009be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
201079bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
201179bdfe76SSatish Balay            submatrix  (same for all local rows)
20127fc3c18eSBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
201392e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2014db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
201592e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
201679bdfe76SSatish Balay            submatrix (same for all local rows).
20177fc3c18eSBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
201892e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
201992e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
202079bdfe76SSatish Balay 
202179bdfe76SSatish Balay    Output Parameter:
202279bdfe76SSatish Balay .  A - the matrix
202379bdfe76SSatish Balay 
2024db81eaa0SLois Curfman McInnes    Options Database Keys:
2025db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
2026db81eaa0SLois Curfman McInnes                      block calculations (much slower)
2027db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
2028494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
2029494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
20303ffaccefSLois Curfman McInnes 
2031b259b22eSLois Curfman McInnes    Notes:
203279bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
203379bdfe76SSatish Balay    (possibly both).
203479bdfe76SSatish Balay 
2035be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2036be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2037be79a94dSBarry Smith 
203879bdfe76SSatish Balay    Storage Information:
203979bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
204079bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
204179bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
204279bdfe76SSatish Balay    local matrix (a rectangular submatrix).
204379bdfe76SSatish Balay 
204479bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
204579bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
204679bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
204779bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
204879bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
204979bdfe76SSatish Balay 
205079bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
205179bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
205279bdfe76SSatish Balay 
2053db81eaa0SLois Curfman McInnes .vb
2054db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2055db81eaa0SLois Curfman McInnes           -------------------
2056db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2057db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2058db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2059db81eaa0SLois Curfman McInnes           -------------------
2060db81eaa0SLois Curfman McInnes .ve
206179bdfe76SSatish Balay 
206279bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
206379bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
206479bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
206557b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
206679bdfe76SSatish Balay 
2067d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2068d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
206979bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
207092e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
207192e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
20726da5968aSLois Curfman McInnes    matrices.
207379bdfe76SSatish Balay 
2074027ccd11SLois Curfman McInnes    Level: intermediate
2075027ccd11SLois Curfman McInnes 
207692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
207779bdfe76SSatish Balay 
20783eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
207979bdfe76SSatish Balay @*/
2080329f5518SBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
208179bdfe76SSatish Balay {
208279bdfe76SSatish Balay   Mat          B;
208379bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
2084f1af5d2fSBarry Smith   int          ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
2085f1af5d2fSBarry Smith   PetscTruth   flag1,flag2,flg;
208679bdfe76SSatish Balay 
2087d64ed03dSBarry Smith   PetscFunctionBegin;
2088962fb4a0SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2089962fb4a0SBarry Smith 
209029bbc08cSBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
209129bbc08cSBarry Smith   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -1: value %d",d_nz);
209229bbc08cSBarry Smith   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -1: value %d",o_nz);
20934fdb0a08SBarry Smith   if (d_nnz) {
209436c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
209529bbc08cSBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
20964fdb0a08SBarry Smith     }
20974fdb0a08SBarry Smith   }
20984fdb0a08SBarry Smith   if (o_nnz) {
209936c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
210029bbc08cSBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
21014fdb0a08SBarry Smith     }
21024fdb0a08SBarry Smith   }
21033914022bSBarry Smith 
2104d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2105494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2106494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2107494eafd4SBarry Smith   if (!flag1 && !flag2 && size == 1) {
21083914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
21093914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
21103914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
21113a40ed3dSBarry Smith     PetscFunctionReturn(0);
21123914022bSBarry Smith   }
21133914022bSBarry Smith 
211479bdfe76SSatish Balay   *A = 0;
21153f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
211679bdfe76SSatish Balay   PLogObjectCreate(B);
211779bdfe76SSatish Balay   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2118549d3d68SSatish Balay   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2119549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
212090f02eecSBarry Smith   B->mapping    = 0;
212179bdfe76SSatish Balay   B->factor     = 0;
212279bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
212379bdfe76SSatish Balay 
2124e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
2125d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2126d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
212779bdfe76SSatish Balay 
21287c922b88SBarry Smith   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
212929bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2130d64ed03dSBarry Smith   }
2131a8c6a408SBarry Smith   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
213229bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"either M or m should be specified");
2133a8c6a408SBarry Smith   }
2134a8c6a408SBarry Smith   if (N == PETSC_DECIDE && n == PETSC_DECIDE) {
213529bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"either N or n should be specified");
2136a8c6a408SBarry Smith   }
2137cee3aa6bSSatish Balay   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2138cee3aa6bSSatish Balay   if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
213979bdfe76SSatish Balay 
214079bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
214179bdfe76SSatish Balay     work[0] = m; work[1] = n;
214279bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
2143ca161407SBarry Smith     ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
214479bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
214579bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
214679bdfe76SSatish Balay   }
214779bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
214879bdfe76SSatish Balay     Mbs = M/bs;
214929bbc08cSBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,"No of global rows must be divisible by blocksize");
215079bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
215179bdfe76SSatish Balay     m   = mbs*bs;
215279bdfe76SSatish Balay   }
215379bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
215479bdfe76SSatish Balay     Nbs = N/bs;
215529bbc08cSBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,"No of global cols must be divisible by blocksize");
215679bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
215779bdfe76SSatish Balay     n   = nbs*bs;
215879bdfe76SSatish Balay   }
2159a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
216029bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"No of local rows, cols must be divisible by blocksize");
2161a8c6a408SBarry Smith   }
216279bdfe76SSatish Balay 
216379bdfe76SSatish Balay   b->m = m; B->m = m;
216479bdfe76SSatish Balay   b->n = n; B->n = n;
216579bdfe76SSatish Balay   b->N = N; B->N = N;
216679bdfe76SSatish Balay   b->M = M; B->M = M;
216779bdfe76SSatish Balay   b->bs  = bs;
216879bdfe76SSatish Balay   b->bs2 = bs*bs;
216979bdfe76SSatish Balay   b->mbs = mbs;
217079bdfe76SSatish Balay   b->nbs = nbs;
217179bdfe76SSatish Balay   b->Mbs = Mbs;
217279bdfe76SSatish Balay   b->Nbs = Nbs;
217379bdfe76SSatish Balay 
2174c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
2175c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
21767e2c5f70SBarry Smith   ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr);
21777e2c5f70SBarry Smith   ierr = MapCreateMPI(B->comm,n,N,&B->cmap);CHKERRQ(ierr);
2178c7fcc2eaSBarry Smith 
217979bdfe76SSatish Balay   /* build local table of row and column ownerships */
2180ff2fd236SBarry Smith   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2181ff2fd236SBarry Smith   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
21820ac07820SSatish Balay   b->cowners    = b->rowners + b->size + 2;
2183ff2fd236SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2184ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
218579bdfe76SSatish Balay   b->rowners[0]    = 0;
218679bdfe76SSatish Balay   for (i=2; i<=b->size; i++) {
218779bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
218879bdfe76SSatish Balay   }
2189ff2fd236SBarry Smith   for (i=0; i<=b->size; i++) {
2190ff2fd236SBarry Smith     b->rowners_bs[i] = b->rowners[i]*bs;
2191ff2fd236SBarry Smith   }
219279bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
219379bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
21944fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
21954fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
21964fa0d573SSatish Balay 
2197ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
219879bdfe76SSatish Balay   b->cowners[0] = 0;
219979bdfe76SSatish Balay   for (i=2; i<=b->size; i++) {
220079bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
220179bdfe76SSatish Balay   }
220279bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
220379bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
22044fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
22054fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
220679bdfe76SSatish Balay 
2207a07cd24cSSatish Balay 
220879bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2209029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
221079bdfe76SSatish Balay   PLogObjectParent(B,b->A);
221179bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2212029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
221379bdfe76SSatish Balay   PLogObjectParent(B,b->B);
221479bdfe76SSatish Balay 
221579bdfe76SSatish Balay   /* build cache for off array entries formed */
22167e2c5f70SBarry Smith   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
22177e2c5f70SBarry Smith   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
22187c922b88SBarry Smith   b->donotstash  = PETSC_FALSE;
22197c922b88SBarry Smith   b->colmap      = PETSC_NULL;
22207c922b88SBarry Smith   b->garray      = PETSC_NULL;
22217c922b88SBarry Smith   b->roworiented = PETSC_TRUE;
222279bdfe76SSatish Balay 
22236fa18ffdSBarry Smith #if defined(PEYSC_USE_MAT_SINGLE)
22246fa18ffdSBarry Smith   /* stuff for MatSetValues_XXX in single precision */
22256fa18ffdSBarry Smith   b->lensetvalues     = 0;
22266fa18ffdSBarry Smith   b->setvaluescopy    = PETSC_NULL;
22276fa18ffdSBarry Smith #endif
22286fa18ffdSBarry Smith 
222930793edcSSatish Balay   /* stuff used in block assembly */
223030793edcSSatish Balay   b->barray       = 0;
223130793edcSSatish Balay 
223279bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
223379bdfe76SSatish Balay   b->lvec         = 0;
223479bdfe76SSatish Balay   b->Mvctx        = 0;
223579bdfe76SSatish Balay 
223679bdfe76SSatish Balay   /* stuff for MatGetRow() */
223779bdfe76SSatish Balay   b->rowindices   = 0;
223879bdfe76SSatish Balay   b->rowvalues    = 0;
223979bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
224079bdfe76SSatish Balay 
2241a07cd24cSSatish Balay   /* hash table stuff */
2242a07cd24cSSatish Balay   b->ht           = 0;
2243187ce0cbSSatish Balay   b->hd           = 0;
22440bdbc534SSatish Balay   b->ht_size      = 0;
22457c922b88SBarry Smith   b->ht_flag      = PETSC_FALSE;
224625fdafccSSatish Balay   b->ht_fact      = 0;
2247187ce0cbSSatish Balay   b->ht_total_ct  = 0;
2248187ce0cbSSatish Balay   b->ht_insert_ct = 0;
2249a07cd24cSSatish Balay 
225079bdfe76SSatish Balay   *A = B;
2251133cdb44SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2252133cdb44SSatish Balay   if (flg) {
2253133cdb44SSatish Balay     double fact = 1.39;
2254133cdb44SSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2255f1af5d2fSBarry Smith     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2256133cdb44SSatish Balay     if (fact <= 1.0) fact = 1.39;
2257133cdb44SSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2258133cdb44SSatish Balay     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2259133cdb44SSatish Balay   }
2260f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
22617fc3c18eSBarry Smith                                      "MatStoreValues_MPIBAIJ",
22620c97f09dSSatish Balay                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2263f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
22647fc3c18eSBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
22650c97f09dSSatish Balay                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2266f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
22675ef9f2a5SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
22680c97f09dSSatish Balay                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
22693a40ed3dSBarry Smith   PetscFunctionReturn(0);
227079bdfe76SSatish Balay }
2271026e39d0SSatish Balay 
22725615d1e5SSatish Balay #undef __FUNC__
2273b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ"
22742e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
22750ac07820SSatish Balay {
22760ac07820SSatish Balay   Mat         mat;
22770ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2278f1af5d2fSBarry Smith   int         ierr,len=0;
2279f1af5d2fSBarry Smith   PetscTruth  flg;
22800ac07820SSatish Balay 
2281d64ed03dSBarry Smith   PetscFunctionBegin;
22820ac07820SSatish Balay   *newmat       = 0;
22833f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
22840ac07820SSatish Balay   PLogObjectCreate(mat);
22850ac07820SSatish Balay   mat->data         = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2286549d3d68SSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
22870ac07820SSatish Balay   mat->factor       = matin->factor;
22880ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
2289aef5e8e0SSatish Balay   mat->insertmode   = NOT_SET_VALUES;
22900ac07820SSatish Balay 
22910ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
22920ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
22930ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
22940ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
22950ac07820SSatish Balay 
22960ac07820SSatish Balay   a->bs  = oldmat->bs;
22970ac07820SSatish Balay   a->bs2 = oldmat->bs2;
22980ac07820SSatish Balay   a->mbs = oldmat->mbs;
22990ac07820SSatish Balay   a->nbs = oldmat->nbs;
23000ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
23010ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
23020ac07820SSatish Balay 
23030ac07820SSatish Balay   a->rstart       = oldmat->rstart;
23040ac07820SSatish Balay   a->rend         = oldmat->rend;
23050ac07820SSatish Balay   a->cstart       = oldmat->cstart;
23060ac07820SSatish Balay   a->cend         = oldmat->cend;
23070ac07820SSatish Balay   a->size         = oldmat->size;
23080ac07820SSatish Balay   a->rank         = oldmat->rank;
2309aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2310aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2311aef5e8e0SSatish Balay   a->rowindices   = 0;
23120ac07820SSatish Balay   a->rowvalues    = 0;
23130ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
231430793edcSSatish Balay   a->barray       = 0;
23153123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
23163123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
23173123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
23183123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
23190ac07820SSatish Balay 
2320133cdb44SSatish Balay   /* hash table stuff */
2321133cdb44SSatish Balay   a->ht           = 0;
2322133cdb44SSatish Balay   a->hd           = 0;
2323133cdb44SSatish Balay   a->ht_size      = 0;
2324133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
232525fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2326133cdb44SSatish Balay   a->ht_total_ct  = 0;
2327133cdb44SSatish Balay   a->ht_insert_ct = 0;
2328133cdb44SSatish Balay 
2329133cdb44SSatish Balay 
2330ff2fd236SBarry Smith   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2331ff2fd236SBarry Smith   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
23320ac07820SSatish Balay   a->cowners    = a->rowners + a->size + 2;
2333ff2fd236SBarry Smith   a->rowners_bs = a->cowners + a->size + 2;
2334549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
23358798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
23368798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
23370ac07820SSatish Balay   if (oldmat->colmap) {
2338aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
23390f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
234048e59246SSatish Balay #else
23410ac07820SSatish Balay     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
23420ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2343549d3d68SSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
234448e59246SSatish Balay #endif
23450ac07820SSatish Balay   } else a->colmap = 0;
23460ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
23470ac07820SSatish Balay     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
23480ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
2349549d3d68SSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
23500ac07820SSatish Balay   } else a->garray = 0;
23510ac07820SSatish Balay 
23520ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
23530ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
23540ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2355e18c124aSSatish Balay 
23560ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
23572e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
23580ac07820SSatish Balay   PLogObjectParent(mat,a->A);
23592e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
23600ac07820SSatish Balay   PLogObjectParent(mat,a->B);
23610ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2362e03a110bSBarry Smith   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
23630ac07820SSatish Balay   if (flg) {
23640ac07820SSatish Balay     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
23650ac07820SSatish Balay   }
23660ac07820SSatish Balay   *newmat = mat;
23673a40ed3dSBarry Smith   PetscFunctionReturn(0);
23680ac07820SSatish Balay }
236957b952d6SSatish Balay 
2370e090d566SSatish Balay #include "petscsys.h"
237157b952d6SSatish Balay 
23725615d1e5SSatish Balay #undef __FUNC__
2373b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ"
237457b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
237557b952d6SSatish Balay {
237657b952d6SSatish Balay   Mat          A;
237757b952d6SSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
237857b952d6SSatish Balay   Scalar       *vals,*buf;
237957b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
238057b952d6SSatish Balay   MPI_Status   status;
2381cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
238257b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2383f1af5d2fSBarry Smith   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
238457b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
238557b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
238657b952d6SSatish Balay 
2387d64ed03dSBarry Smith   PetscFunctionBegin;
2388f1af5d2fSBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
238957b952d6SSatish Balay 
2390d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2391d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
239257b952d6SSatish Balay   if (!rank) {
239357b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2394e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
239529bbc08cSBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2396d64ed03dSBarry Smith     if (header[3] < 0) {
239729bbc08cSBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2398d64ed03dSBarry Smith     }
23996c5fab8fSBarry Smith   }
2400d64ed03dSBarry Smith 
2401ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
240257b952d6SSatish Balay   M = header[1]; N = header[2];
240357b952d6SSatish Balay 
240429bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
240557b952d6SSatish Balay 
240657b952d6SSatish Balay   /*
240757b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
240857b952d6SSatish Balay      divisible by the blocksize
240957b952d6SSatish Balay   */
241057b952d6SSatish Balay   Mbs        = M/bs;
241157b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
241257b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
241357b952d6SSatish Balay   else                  Mbs++;
241457b952d6SSatish Balay   if (extra_rows &&!rank) {
2415b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
241657b952d6SSatish Balay   }
2417537820f0SBarry Smith 
241857b952d6SSatish Balay   /* determine ownership of all rows */
241957b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
242057b952d6SSatish Balay   m   = mbs * bs;
2421cee3aa6bSSatish Balay   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2422cee3aa6bSSatish Balay   browners = rowners + size + 1;
2423ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
242457b952d6SSatish Balay   rowners[0] = 0;
2425cee3aa6bSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2426cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
242757b952d6SSatish Balay   rstart = rowners[rank];
242857b952d6SSatish Balay   rend   = rowners[rank+1];
242957b952d6SSatish Balay 
243057b952d6SSatish Balay   /* distribute row lengths to all processors */
243157b952d6SSatish Balay   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
243257b952d6SSatish Balay   if (!rank) {
243357b952d6SSatish Balay     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2434e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
243557b952d6SSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
243657b952d6SSatish Balay     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2437cee3aa6bSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2438ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2439606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2440d64ed03dSBarry Smith   } else {
2441ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
244257b952d6SSatish Balay   }
244357b952d6SSatish Balay 
244457b952d6SSatish Balay   if (!rank) {
244557b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
244657b952d6SSatish Balay     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2447549d3d68SSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
244857b952d6SSatish Balay     for (i=0; i<size; i++) {
244957b952d6SSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
245057b952d6SSatish Balay         procsnz[i] += rowlengths[j];
245157b952d6SSatish Balay       }
245257b952d6SSatish Balay     }
2453606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
245457b952d6SSatish Balay 
245557b952d6SSatish Balay     /* determine max buffer needed and allocate it */
245657b952d6SSatish Balay     maxnz = 0;
245757b952d6SSatish Balay     for (i=0; i<size; i++) {
245857b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
245957b952d6SSatish Balay     }
246057b952d6SSatish Balay     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
246157b952d6SSatish Balay 
246257b952d6SSatish Balay     /* read in my part of the matrix column indices  */
246357b952d6SSatish Balay     nz = procsnz[0];
246457b952d6SSatish Balay     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
246557b952d6SSatish Balay     mycols = ibuf;
2466cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2467e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2468cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2469cee3aa6bSSatish Balay 
247057b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
247157b952d6SSatish Balay     for (i=1; i<size-1; i++) {
247257b952d6SSatish Balay       nz   = procsnz[i];
2473e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2474ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
247557b952d6SSatish Balay     }
247657b952d6SSatish Balay     /* read in the stuff for the last proc */
247757b952d6SSatish Balay     if (size != 1) {
247857b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2479e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
248057b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2481ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
248257b952d6SSatish Balay     }
2483606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2484d64ed03dSBarry Smith   } else {
248557b952d6SSatish Balay     /* determine buffer space needed for message */
248657b952d6SSatish Balay     nz = 0;
248757b952d6SSatish Balay     for (i=0; i<m; i++) {
248857b952d6SSatish Balay       nz += locrowlens[i];
248957b952d6SSatish Balay     }
249057b952d6SSatish Balay     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
249157b952d6SSatish Balay     mycols = ibuf;
249257b952d6SSatish Balay     /* receive message of column indices*/
2493ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2494ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
249529bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
249657b952d6SSatish Balay   }
249757b952d6SSatish Balay 
249857b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2499cee3aa6bSSatish Balay   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2500cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
250157b952d6SSatish Balay   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2502549d3d68SSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
250357b952d6SSatish Balay   masked1 = mask    + Mbs;
250457b952d6SSatish Balay   masked2 = masked1 + Mbs;
250557b952d6SSatish Balay   rowcount = 0; nzcount = 0;
250657b952d6SSatish Balay   for (i=0; i<mbs; i++) {
250757b952d6SSatish Balay     dcount  = 0;
250857b952d6SSatish Balay     odcount = 0;
250957b952d6SSatish Balay     for (j=0; j<bs; j++) {
251057b952d6SSatish Balay       kmax = locrowlens[rowcount];
251157b952d6SSatish Balay       for (k=0; k<kmax; k++) {
251257b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
251357b952d6SSatish Balay         if (!mask[tmp]) {
251457b952d6SSatish Balay           mask[tmp] = 1;
251557b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
251657b952d6SSatish Balay           else masked1[dcount++] = tmp;
251757b952d6SSatish Balay         }
251857b952d6SSatish Balay       }
251957b952d6SSatish Balay       rowcount++;
252057b952d6SSatish Balay     }
2521cee3aa6bSSatish Balay 
252257b952d6SSatish Balay     dlens[i]  = dcount;
252357b952d6SSatish Balay     odlens[i] = odcount;
2524cee3aa6bSSatish Balay 
252557b952d6SSatish Balay     /* zero out the mask elements we set */
252657b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
252757b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
252857b952d6SSatish Balay   }
2529cee3aa6bSSatish Balay 
253057b952d6SSatish Balay   /* create our matrix */
2531549d3d68SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
253257b952d6SSatish Balay   A = *newmat;
25336d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
253457b952d6SSatish Balay 
253557b952d6SSatish Balay   if (!rank) {
253657b952d6SSatish Balay     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
253757b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
253857b952d6SSatish Balay     nz = procsnz[0];
253957b952d6SSatish Balay     vals = buf;
2540cee3aa6bSSatish Balay     mycols = ibuf;
2541cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2542e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2543cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2544537820f0SBarry Smith 
254557b952d6SSatish Balay     /* insert into matrix */
254657b952d6SSatish Balay     jj      = rstart*bs;
254757b952d6SSatish Balay     for (i=0; i<m; i++) {
2548b48991e4SBarry Smith       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
254957b952d6SSatish Balay       mycols += locrowlens[i];
255057b952d6SSatish Balay       vals   += locrowlens[i];
255157b952d6SSatish Balay       jj++;
255257b952d6SSatish Balay     }
255357b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
255457b952d6SSatish Balay     for (i=1; i<size-1; i++) {
255557b952d6SSatish Balay       nz   = procsnz[i];
255657b952d6SSatish Balay       vals = buf;
2557e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2558ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
255957b952d6SSatish Balay     }
256057b952d6SSatish Balay     /* the last proc */
256157b952d6SSatish Balay     if (size != 1){
256257b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2563cee3aa6bSSatish Balay       vals = buf;
2564e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
256557b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2566ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
256757b952d6SSatish Balay     }
2568606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2569d64ed03dSBarry Smith   } else {
257057b952d6SSatish Balay     /* receive numeric values */
257157b952d6SSatish Balay     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
257257b952d6SSatish Balay 
257357b952d6SSatish Balay     /* receive message of values*/
257457b952d6SSatish Balay     vals   = buf;
2575cee3aa6bSSatish Balay     mycols = ibuf;
2576ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2577ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
257829bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
257957b952d6SSatish Balay 
258057b952d6SSatish Balay     /* insert into matrix */
258157b952d6SSatish Balay     jj      = rstart*bs;
2582cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
2583b48991e4SBarry Smith       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
258457b952d6SSatish Balay       mycols += locrowlens[i];
258557b952d6SSatish Balay       vals   += locrowlens[i];
258657b952d6SSatish Balay       jj++;
258757b952d6SSatish Balay     }
258857b952d6SSatish Balay   }
2589606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2590606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2591606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2592606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2593606d414cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2594606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
25956d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25966d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25973a40ed3dSBarry Smith   PetscFunctionReturn(0);
259857b952d6SSatish Balay }
259957b952d6SSatish Balay 
2600133cdb44SSatish Balay #undef __FUNC__
2601b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor"
2602133cdb44SSatish Balay /*@
2603133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2604133cdb44SSatish Balay 
2605133cdb44SSatish Balay    Input Parameters:
2606133cdb44SSatish Balay .  mat  - the matrix
2607133cdb44SSatish Balay .  fact - factor
2608133cdb44SSatish Balay 
2609fee21e36SBarry Smith    Collective on Mat
2610fee21e36SBarry Smith 
26118c890885SBarry Smith    Level: advanced
26128c890885SBarry Smith 
2613133cdb44SSatish Balay   Notes:
2614133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2615133cdb44SSatish Balay 
2616133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2617133cdb44SSatish Balay 
2618133cdb44SSatish Balay .seealso: MatSetOption()
2619133cdb44SSatish Balay @*/
2620329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2621133cdb44SSatish Balay {
262225fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2623133cdb44SSatish Balay 
2624133cdb44SSatish Balay   PetscFunctionBegin;
2625133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
262625fdafccSSatish Balay   if (mat->type != MATMPIBAIJ) {
262729bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only.");
2628133cdb44SSatish Balay   }
2629133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2630133cdb44SSatish Balay   baij->ht_fact = fact;
2631133cdb44SSatish Balay   PetscFunctionReturn(0);
2632133cdb44SSatish Balay }
2633