xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 865e5f61f906011a1e8ca16f59ec824bec4a567e)
179bdfe76SSatish Balay 
2e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
379bdfe76SSatish Balay 
4dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5dfbe8321SBarry Smith EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6dfbe8321SBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
7dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
8dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
9dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
10dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
11dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
12dfbe8321SBarry Smith EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
13dfbe8321SBarry Smith EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14dfbe8321SBarry Smith EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
1593fea6afSBarry Smith 
1693fea6afSBarry Smith /*  UGLY, ugly, ugly
1787828ca2SBarry Smith    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
1893fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
1993fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2093fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2193fea6afSBarry Smith    into the single precision data structures.
2293fea6afSBarry Smith */
2393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
24dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
25dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
26dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
27dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
28dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2993fea6afSBarry Smith #else
306fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
3193fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
3293fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
336fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
346fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
3593fea6afSBarry Smith #endif
363b2fbd54SBarry Smith 
374a2ae208SSatish Balay #undef __FUNCT__
384a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
407843d17aSBarry Smith {
417843d17aSBarry Smith   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
42dfbe8321SBarry Smith   PetscErrorCode ierr;
43dfbe8321SBarry Smith   int i;
4487828ca2SBarry Smith   PetscScalar  *va,*vb;
457843d17aSBarry Smith   Vec          vtmp;
467843d17aSBarry Smith 
477843d17aSBarry Smith   PetscFunctionBegin;
487843d17aSBarry Smith 
497843d17aSBarry Smith   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
501ebc52fbSHong Zhang   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
517843d17aSBarry Smith 
52ac355199SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
537843d17aSBarry Smith   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
541ebc52fbSHong Zhang   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
557843d17aSBarry Smith 
56273d9f13SBarry Smith   for (i=0; i<A->m; i++){
57273d9f13SBarry Smith     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
587843d17aSBarry Smith   }
597843d17aSBarry Smith 
601ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
611ebc52fbSHong Zhang   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62ac355199SBarry Smith   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
637843d17aSBarry Smith 
647843d17aSBarry Smith   PetscFunctionReturn(0);
657843d17aSBarry Smith }
667843d17aSBarry Smith 
677fc3c18eSBarry Smith EXTERN_C_BEGIN
684a2ae208SSatish Balay #undef __FUNCT__
694a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70dfbe8321SBarry Smith PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
717fc3c18eSBarry Smith {
727fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
73dfbe8321SBarry Smith   PetscErrorCode ierr;
747fc3c18eSBarry Smith 
757fc3c18eSBarry Smith   PetscFunctionBegin;
767fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
777fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
787fc3c18eSBarry Smith   PetscFunctionReturn(0);
797fc3c18eSBarry Smith }
807fc3c18eSBarry Smith EXTERN_C_END
817fc3c18eSBarry Smith 
827fc3c18eSBarry Smith EXTERN_C_BEGIN
834a2ae208SSatish Balay #undef __FUNCT__
844a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85dfbe8321SBarry Smith PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
867fc3c18eSBarry Smith {
877fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
88dfbe8321SBarry Smith   PetscErrorCode ierr;
897fc3c18eSBarry Smith 
907fc3c18eSBarry Smith   PetscFunctionBegin;
917fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
927fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
937fc3c18eSBarry Smith   PetscFunctionReturn(0);
947fc3c18eSBarry Smith }
957fc3c18eSBarry Smith EXTERN_C_END
967fc3c18eSBarry Smith 
97537820f0SBarry Smith /*
98537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
9957b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
10057b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
10157b952d6SSatish Balay    length of colmap equals the global matrix length.
10257b952d6SSatish Balay */
1034a2ae208SSatish Balay #undef __FUNCT__
1044a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
10657b952d6SSatish Balay {
10757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
10857b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
1096849ba73SBarry Smith   PetscErrorCode ierr;
1106849ba73SBarry Smith   int         nbs = B->nbs,i,bs=B->bs;
11157b952d6SSatish Balay 
112d64ed03dSBarry Smith   PetscFunctionBegin;
113aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
114f14a1c24SBarry Smith   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
11548e59246SSatish Balay   for (i=0; i<nbs; i++){
1160f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
11748e59246SSatish Balay   }
11848e59246SSatish Balay #else
11982502324SSatish Balay   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
120b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121549d3d68SSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
12348e59246SSatish Balay #endif
1243a40ed3dSBarry Smith   PetscFunctionReturn(0);
12557b952d6SSatish Balay }
12657b952d6SSatish Balay 
12780c1aa95SSatish Balay #define CHUNKSIZE  10
12880c1aa95SSatish Balay 
129f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
13080c1aa95SSatish Balay { \
13180c1aa95SSatish Balay  \
13280c1aa95SSatish Balay     brow = row/bs;  \
13380c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
13580c1aa95SSatish Balay       bcol = col/bs; \
13680c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
137ab26458aSBarry Smith       low = 0; high = nrow; \
138ab26458aSBarry Smith       while (high-low > 3) { \
139ab26458aSBarry Smith         t = (low+high)/2; \
140ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
141ab26458aSBarry Smith         else              low  = t; \
142ab26458aSBarry Smith       } \
143ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
14480c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
14580c1aa95SSatish Balay         if (rp[_i] == bcol) { \
14680c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
148eada6651SSatish Balay           else                    *bap  = value;  \
149ac7a638eSSatish Balay           goto a_noinsert; \
15080c1aa95SSatish Balay         } \
15180c1aa95SSatish Balay       } \
15289280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
153a45adfd6SMatthew Knepley       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
15480c1aa95SSatish Balay       if (nrow >= rmax) { \
15580c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
15680c1aa95SSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
1573eda8832SBarry Smith         MatScalar *new_a; \
15880c1aa95SSatish Balay  \
159a45adfd6SMatthew Knepley         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
16089280ab3SLois Curfman McInnes  \
16180c1aa95SSatish Balay         /* malloc new storage space */ \
1623eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
16382502324SSatish Balay         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
16480c1aa95SSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
16580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
16680c1aa95SSatish Balay  \
16780c1aa95SSatish Balay         /* copy over old data into new slots */ \
16880c1aa95SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
16980c1aa95SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
17180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
1723eda8832SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
1733eda8832SBarry Smith         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
17487828ca2SBarry Smith         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
1763eda8832SBarry Smith                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
17780c1aa95SSatish Balay         /* free up old matrix storage */ \
178606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179606d414cSSatish Balay         if (!a->singlemalloc) { \
180606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182606d414cSSatish Balay         } \
18380c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1847c922b88SBarry Smith         a->singlemalloc = PETSC_TRUE; \
18580c1aa95SSatish Balay  \
18680c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188b0a32e0cSBarry Smith         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
18980c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
19080c1aa95SSatish Balay         a->reallocs++; \
19180c1aa95SSatish Balay         a->nz++; \
19280c1aa95SSatish Balay       } \
19380c1aa95SSatish Balay       N = nrow++ - 1;  \
19480c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
19580c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
19680c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1973eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
19880c1aa95SSatish Balay       } \
1993eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
20080c1aa95SSatish Balay       rp[_i]                      = bcol;  \
20180c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
202ac7a638eSSatish Balay       a_noinsert:; \
20380c1aa95SSatish Balay     ailen[brow] = nrow; \
20480c1aa95SSatish Balay }
20557b952d6SSatish Balay 
206ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207ac7a638eSSatish Balay { \
208ac7a638eSSatish Balay     brow = row/bs;  \
209ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
211ac7a638eSSatish Balay       bcol = col/bs; \
212ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
213ac7a638eSSatish Balay       low = 0; high = nrow; \
214ac7a638eSSatish Balay       while (high-low > 3) { \
215ac7a638eSSatish Balay         t = (low+high)/2; \
216ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
217ac7a638eSSatish Balay         else              low  = t; \
218ac7a638eSSatish Balay       } \
219ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
220ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
221ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
222ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
224ac7a638eSSatish Balay           else                    *bap  = value;  \
225ac7a638eSSatish Balay           goto b_noinsert; \
226ac7a638eSSatish Balay         } \
227ac7a638eSSatish Balay       } \
22889280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
229a45adfd6SMatthew Knepley       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
230ac7a638eSSatish Balay       if (nrow >= rmax) { \
231ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
23274c639caSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
2333eda8832SBarry Smith         MatScalar *new_a; \
234ac7a638eSSatish Balay  \
235a45adfd6SMatthew Knepley         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
23689280ab3SLois Curfman McInnes  \
237ac7a638eSSatish Balay         /* malloc new storage space */ \
2383eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
23982502324SSatish Balay         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240ac7a638eSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
241ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
242ac7a638eSSatish Balay  \
243ac7a638eSSatish Balay         /* copy over old data into new slots */ \
244ac7a638eSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
24574c639caSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247ac7a638eSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
2483eda8832SBarry Smith         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
2493eda8832SBarry Smith         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
2503eda8832SBarry Smith         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
2523eda8832SBarry Smith                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253ac7a638eSSatish Balay         /* free up old matrix storage */ \
254606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255606d414cSSatish Balay         if (!b->singlemalloc) { \
256606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258606d414cSSatish Balay         } \
25974c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
2607c922b88SBarry Smith         b->singlemalloc = PETSC_TRUE; \
261ac7a638eSSatish Balay  \
262ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264b0a32e0cSBarry Smith         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
26574c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
26674c639caSSatish Balay         b->reallocs++; \
26774c639caSSatish Balay         b->nz++; \
268ac7a638eSSatish Balay       } \
269ac7a638eSSatish Balay       N = nrow++ - 1;  \
270ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
271ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
272ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2733eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274ac7a638eSSatish Balay       } \
2753eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
277ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
278ac7a638eSSatish Balay       b_noinsert:; \
279ac7a638eSSatish Balay     bilen[brow] = nrow; \
280ac7a638eSSatish Balay }
281ac7a638eSSatish Balay 
28293fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2834a2ae208SSatish Balay #undef __FUNCT__
2844a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ"
285dfbe8321SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
28657b952d6SSatish Balay {
2876fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288dfbe8321SBarry Smith   PetscErrorCode ierr;
289dfbe8321SBarry Smith   int  i,N = m*n;
2906fa18ffdSBarry Smith   MatScalar   *vsingle;
29193fea6afSBarry Smith 
29293fea6afSBarry Smith   PetscFunctionBegin;
2936fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2946fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
29582502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2966fa18ffdSBarry Smith     b->setvalueslen  = N;
2976fa18ffdSBarry Smith   }
2986fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2996fa18ffdSBarry Smith 
30093fea6afSBarry Smith   for (i=0; i<N; i++) {
30193fea6afSBarry Smith     vsingle[i] = v[i];
30293fea6afSBarry Smith   }
30393fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
30493fea6afSBarry Smith   PetscFunctionReturn(0);
30593fea6afSBarry Smith }
30693fea6afSBarry Smith 
3074a2ae208SSatish Balay #undef __FUNCT__
3084a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
309dfbe8321SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
31093fea6afSBarry Smith {
3116fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
312dfbe8321SBarry Smith   PetscErrorCode ierr;
313dfbe8321SBarry Smith   int i,N = m*n*b->bs2;
3146fa18ffdSBarry Smith   MatScalar   *vsingle;
31593fea6afSBarry Smith 
31693fea6afSBarry Smith   PetscFunctionBegin;
3176fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3186fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
31982502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3206fa18ffdSBarry Smith     b->setvalueslen  = N;
3216fa18ffdSBarry Smith   }
3226fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
32393fea6afSBarry Smith   for (i=0; i<N; i++) {
32493fea6afSBarry Smith     vsingle[i] = v[i];
32593fea6afSBarry Smith   }
32693fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
32793fea6afSBarry Smith   PetscFunctionReturn(0);
32893fea6afSBarry Smith }
3296fa18ffdSBarry Smith 
3304a2ae208SSatish Balay #undef __FUNCT__
3314a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
332dfbe8321SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
3336fa18ffdSBarry Smith {
3346fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
335dfbe8321SBarry Smith   PetscErrorCode ierr;
336dfbe8321SBarry Smith   int i,N = m*n;
3376fa18ffdSBarry Smith   MatScalar   *vsingle;
3386fa18ffdSBarry Smith 
3396fa18ffdSBarry Smith   PetscFunctionBegin;
3406fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3416fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
34282502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3436fa18ffdSBarry Smith     b->setvalueslen  = N;
3446fa18ffdSBarry Smith   }
3456fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3466fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3476fa18ffdSBarry Smith     vsingle[i] = v[i];
3486fa18ffdSBarry Smith   }
3496fa18ffdSBarry Smith   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3506fa18ffdSBarry Smith   PetscFunctionReturn(0);
3516fa18ffdSBarry Smith }
3526fa18ffdSBarry Smith 
3534a2ae208SSatish Balay #undef __FUNCT__
3544a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
355dfbe8321SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
3566fa18ffdSBarry Smith {
3576fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
358dfbe8321SBarry Smith   PetscErrorCode ierr;
359dfbe8321SBarry Smith   int i,N = m*n*b->bs2;
3606fa18ffdSBarry Smith   MatScalar   *vsingle;
3616fa18ffdSBarry Smith 
3626fa18ffdSBarry Smith   PetscFunctionBegin;
3636fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3646fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
36582502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3666fa18ffdSBarry Smith     b->setvalueslen  = N;
3676fa18ffdSBarry Smith   }
3686fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3696fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3706fa18ffdSBarry Smith     vsingle[i] = v[i];
3716fa18ffdSBarry Smith   }
3726fa18ffdSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3736fa18ffdSBarry Smith   PetscFunctionReturn(0);
3746fa18ffdSBarry Smith }
37593fea6afSBarry Smith #endif
37693fea6afSBarry Smith 
3774a2ae208SSatish Balay #undef __FUNCT__
378e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
379dfbe8321SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
38093fea6afSBarry Smith {
38157b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
38293fea6afSBarry Smith   MatScalar   value;
383273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
384dfbe8321SBarry Smith   PetscErrorCode ierr;
385dfbe8321SBarry Smith   int i,j,row,col;
386273d9f13SBarry Smith   int         rstart_orig=baij->rstart_bs;
3874fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
3884fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
38957b952d6SSatish Balay 
390eada6651SSatish Balay   /* Some Variables required in the macro */
39180c1aa95SSatish Balay   Mat         A = baij->A;
39280c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
393ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
3943eda8832SBarry Smith   MatScalar   *aa=a->a;
395ac7a638eSSatish Balay 
396ac7a638eSSatish Balay   Mat         B = baij->B;
397ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
398ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3993eda8832SBarry Smith   MatScalar   *ba=b->a;
400ac7a638eSSatish Balay 
401ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
402ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
4033eda8832SBarry Smith   MatScalar   *ap,*bap;
40480c1aa95SSatish Balay 
405d64ed03dSBarry Smith   PetscFunctionBegin;
40657b952d6SSatish Balay   for (i=0; i<m; i++) {
4075ef9f2a5SBarry Smith     if (im[i] < 0) continue;
408aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
409590ac198SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
410639f9d9dSBarry Smith #endif
41157b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
41257b952d6SSatish Balay       row = im[i] - rstart_orig;
41357b952d6SSatish Balay       for (j=0; j<n; j++) {
41457b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
41557b952d6SSatish Balay           col = in[j] - cstart_orig;
41657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
417f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
41880c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
41973959e64SBarry Smith         } else if (in[j] < 0) continue;
420aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
421590ac198SBarry Smith         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
422639f9d9dSBarry Smith #endif
42357b952d6SSatish Balay         else {
42457b952d6SSatish Balay           if (mat->was_assembled) {
425905e6a2fSBarry Smith             if (!baij->colmap) {
426905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427905e6a2fSBarry Smith             }
428aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4290f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
430bba1ac68SSatish Balay             col  = col - 1;
43148e59246SSatish Balay #else
432bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
43348e59246SSatish Balay #endif
43457b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
43557b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4368295de27SSatish Balay               col =  in[j];
4379bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
4389bf004c3SSatish Balay               B = baij->B;
4399bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
4409bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
4419bf004c3SSatish Balay               ba=b->a;
442bba1ac68SSatish Balay             } else col += in[j]%bs;
4438295de27SSatish Balay           } else col = in[j];
44457b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
44590da58bdSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
44690da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
44757b952d6SSatish Balay         }
44857b952d6SSatish Balay       }
449d64ed03dSBarry Smith     } else {
45090f02eecSBarry Smith       if (!baij->donotstash) {
451ff2fd236SBarry Smith         if (roworiented) {
4526fa18ffdSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453ff2fd236SBarry Smith         } else {
4546fa18ffdSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
45557b952d6SSatish Balay         }
45657b952d6SSatish Balay       }
45757b952d6SSatish Balay     }
45890f02eecSBarry Smith   }
4593a40ed3dSBarry Smith   PetscFunctionReturn(0);
46057b952d6SSatish Balay }
46157b952d6SSatish Balay 
4624a2ae208SSatish Balay #undef __FUNCT__
4634a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
464dfbe8321SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
465ab26458aSBarry Smith {
466ab26458aSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
467f15d580aSBarry Smith   const MatScalar *value;
468f15d580aSBarry Smith   MatScalar       *barray=baij->barray;
469273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
470dfbe8321SBarry Smith   PetscErrorCode ierr;
471dfbe8321SBarry Smith   int i,j,ii,jj,row,col,rstart=baij->rstart;
472ab26458aSBarry Smith   int             rend=baij->rend,cstart=baij->cstart,stepval;
473ab26458aSBarry Smith   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
474ab26458aSBarry Smith 
475b16ae2b1SBarry Smith   PetscFunctionBegin;
47630793edcSSatish Balay   if(!barray) {
47782502324SSatish Balay     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
47882502324SSatish Balay     baij->barray = barray;
47930793edcSSatish Balay   }
48030793edcSSatish Balay 
481ab26458aSBarry Smith   if (roworiented) {
482ab26458aSBarry Smith     stepval = (n-1)*bs;
483ab26458aSBarry Smith   } else {
484ab26458aSBarry Smith     stepval = (m-1)*bs;
485ab26458aSBarry Smith   }
486ab26458aSBarry Smith   for (i=0; i<m; i++) {
4875ef9f2a5SBarry Smith     if (im[i] < 0) continue;
488aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
489590ac198SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
490ab26458aSBarry Smith #endif
491ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
492ab26458aSBarry Smith       row = im[i] - rstart;
493ab26458aSBarry Smith       for (j=0; j<n; j++) {
49415b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
49515b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
496f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
49715b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
498f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
49915b57d14SSatish Balay         } else { /* Here a copy is required */
500ab26458aSBarry Smith           if (roworiented) {
501ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
502ab26458aSBarry Smith           } else {
503ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
504abef11f7SSatish Balay           }
50547513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
50647513183SBarry Smith             for (jj=0; jj<bs; jj++) {
50730793edcSSatish Balay               *barray++  = *value++;
50847513183SBarry Smith             }
50947513183SBarry Smith           }
51030793edcSSatish Balay           barray -=bs2;
51115b57d14SSatish Balay         }
512abef11f7SSatish Balay 
513abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
514abef11f7SSatish Balay           col  = in[j] - cstart;
51593fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516ab26458aSBarry Smith         }
5175ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
518aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
519590ac198SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
520ab26458aSBarry Smith #endif
521ab26458aSBarry Smith         else {
522ab26458aSBarry Smith           if (mat->was_assembled) {
523ab26458aSBarry Smith             if (!baij->colmap) {
524ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
525ab26458aSBarry Smith             }
526a5eb4965SSatish Balay 
527aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
528aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
529fa46199cSSatish Balay             { int data;
5300f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
53129bbc08cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
532fa46199cSSatish Balay             }
53348e59246SSatish Balay #else
53429bbc08cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
535a5eb4965SSatish Balay #endif
53648e59246SSatish Balay #endif
537aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
5380f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
539fa46199cSSatish Balay             col  = (col - 1)/bs;
54048e59246SSatish Balay #else
541a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
54248e59246SSatish Balay #endif
543ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
544ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
545ab26458aSBarry Smith               col =  in[j];
546ab26458aSBarry Smith             }
547ab26458aSBarry Smith           }
548ab26458aSBarry Smith           else col = in[j];
54993fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
550ab26458aSBarry Smith         }
551ab26458aSBarry Smith       }
552d64ed03dSBarry Smith     } else {
553ab26458aSBarry Smith       if (!baij->donotstash) {
554ff2fd236SBarry Smith         if (roworiented) {
5556fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556ff2fd236SBarry Smith         } else {
5576fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
558ff2fd236SBarry Smith         }
559abef11f7SSatish Balay       }
560ab26458aSBarry Smith     }
561ab26458aSBarry Smith   }
5623a40ed3dSBarry Smith   PetscFunctionReturn(0);
563ab26458aSBarry Smith }
5646fa18ffdSBarry Smith 
5650bdbc534SSatish Balay #define HASH_KEY 0.6180339887
566c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
5676fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
568c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
5694a2ae208SSatish Balay #undef __FUNCT__
5704a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
571dfbe8321SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
5720bdbc534SSatish Balay {
5730bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
574273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
575dfbe8321SBarry Smith   PetscErrorCode ierr;
576dfbe8321SBarry Smith   int i,j,row,col;
577273d9f13SBarry Smith   int         rstart_orig=baij->rstart_bs;
578c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
579c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
580329f5518SBarry Smith   PetscReal   tmp;
5813eda8832SBarry Smith   MatScalar   **HD = baij->hd,value;
582aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5834a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5844a15367fSSatish Balay #endif
5850bdbc534SSatish Balay 
5860bdbc534SSatish Balay   PetscFunctionBegin;
5870bdbc534SSatish Balay 
5880bdbc534SSatish Balay   for (i=0; i<m; i++) {
589aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
59029bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
591590ac198SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
5920bdbc534SSatish Balay #endif
5930bdbc534SSatish Balay       row = im[i];
594c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
5950bdbc534SSatish Balay       for (j=0; j<n; j++) {
5960bdbc534SSatish Balay         col = in[j];
5976fa18ffdSBarry Smith         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5980bdbc534SSatish Balay         /* Look up into the Hash Table */
599c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
600c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6010bdbc534SSatish Balay 
602c2760754SSatish Balay 
603c2760754SSatish Balay         idx = h1;
604aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
605187ce0cbSSatish Balay         insert_ct++;
606187ce0cbSSatish Balay         total_ct++;
607187ce0cbSSatish Balay         if (HT[idx] != key) {
608187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
609187ce0cbSSatish Balay           if (idx == size) {
610187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
611187ce0cbSSatish Balay             if (idx == h1) {
612a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
613187ce0cbSSatish Balay             }
614187ce0cbSSatish Balay           }
615187ce0cbSSatish Balay         }
616187ce0cbSSatish Balay #else
617c2760754SSatish Balay         if (HT[idx] != key) {
618c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
619c2760754SSatish Balay           if (idx == size) {
620c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
621c2760754SSatish Balay             if (idx == h1) {
622a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
623c2760754SSatish Balay             }
624c2760754SSatish Balay           }
625c2760754SSatish Balay         }
626187ce0cbSSatish Balay #endif
627c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
628c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
629c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
6300bdbc534SSatish Balay       }
6310bdbc534SSatish Balay     } else {
6320bdbc534SSatish Balay       if (!baij->donotstash) {
633ff2fd236SBarry Smith         if (roworiented) {
6348798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
635ff2fd236SBarry Smith         } else {
6368798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
6370bdbc534SSatish Balay         }
6380bdbc534SSatish Balay       }
6390bdbc534SSatish Balay     }
6400bdbc534SSatish Balay   }
641aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
642187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
643187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
644187ce0cbSSatish Balay #endif
6450bdbc534SSatish Balay   PetscFunctionReturn(0);
6460bdbc534SSatish Balay }
6470bdbc534SSatish Balay 
6484a2ae208SSatish Balay #undef __FUNCT__
6494a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
650dfbe8321SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
6510bdbc534SSatish Balay {
6520bdbc534SSatish Balay   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
653273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
654dfbe8321SBarry Smith   PetscErrorCode ierr;
655dfbe8321SBarry Smith   int i,j,ii,jj,row,col;
656273d9f13SBarry Smith   int             rstart=baij->rstart ;
657b4cc0f5aSSatish Balay   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
658c2760754SSatish Balay   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
659329f5518SBarry Smith   PetscReal       tmp;
6603eda8832SBarry Smith   MatScalar       **HD = baij->hd,*baij_a;
661f15d580aSBarry Smith   const MatScalar *v_t,*value;
662aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
6634a15367fSSatish Balay   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
6644a15367fSSatish Balay #endif
6650bdbc534SSatish Balay 
666d0a41580SSatish Balay   PetscFunctionBegin;
667d0a41580SSatish Balay 
6680bdbc534SSatish Balay   if (roworiented) {
6690bdbc534SSatish Balay     stepval = (n-1)*bs;
6700bdbc534SSatish Balay   } else {
6710bdbc534SSatish Balay     stepval = (m-1)*bs;
6720bdbc534SSatish Balay   }
6730bdbc534SSatish Balay   for (i=0; i<m; i++) {
674aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
675590ac198SBarry Smith     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
676590ac198SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
6770bdbc534SSatish Balay #endif
6780bdbc534SSatish Balay     row   = im[i];
679187ce0cbSSatish Balay     v_t   = v + i*bs2;
680c2760754SSatish Balay     if (row >= rstart && row < rend) {
6810bdbc534SSatish Balay       for (j=0; j<n; j++) {
6820bdbc534SSatish Balay         col = in[j];
6830bdbc534SSatish Balay 
6840bdbc534SSatish Balay         /* Look up into the Hash Table */
685c2760754SSatish Balay         key = row*Nbs+col+1;
686c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6870bdbc534SSatish Balay 
688c2760754SSatish Balay         idx = h1;
689aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
690187ce0cbSSatish Balay         total_ct++;
691187ce0cbSSatish Balay         insert_ct++;
692187ce0cbSSatish Balay        if (HT[idx] != key) {
693187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
694187ce0cbSSatish Balay           if (idx == size) {
695187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
696187ce0cbSSatish Balay             if (idx == h1) {
697a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
698187ce0cbSSatish Balay             }
699187ce0cbSSatish Balay           }
700187ce0cbSSatish Balay         }
701187ce0cbSSatish Balay #else
702c2760754SSatish Balay         if (HT[idx] != key) {
703c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
704c2760754SSatish Balay           if (idx == size) {
705c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
706c2760754SSatish Balay             if (idx == h1) {
707a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
708c2760754SSatish Balay             }
709c2760754SSatish Balay           }
710c2760754SSatish Balay         }
711187ce0cbSSatish Balay #endif
712c2760754SSatish Balay         baij_a = HD[idx];
7130bdbc534SSatish Balay         if (roworiented) {
714c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
715187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
716187ce0cbSSatish Balay           value = v_t;
717187ce0cbSSatish Balay           v_t  += bs;
718fef45726SSatish Balay           if (addv == ADD_VALUES) {
719c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
720c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
721fef45726SSatish Balay                 baij_a[jj]  += *value++;
722b4cc0f5aSSatish Balay               }
723b4cc0f5aSSatish Balay             }
724fef45726SSatish Balay           } else {
725c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
726c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
727fef45726SSatish Balay                 baij_a[jj]  = *value++;
728fef45726SSatish Balay               }
729fef45726SSatish Balay             }
730fef45726SSatish Balay           }
7310bdbc534SSatish Balay         } else {
7320bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
733fef45726SSatish Balay           if (addv == ADD_VALUES) {
734b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
7350bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
736fef45726SSatish Balay                 baij_a[jj]  += *value++;
737fef45726SSatish Balay               }
738fef45726SSatish Balay             }
739fef45726SSatish Balay           } else {
740fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
741fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
742fef45726SSatish Balay                 baij_a[jj]  = *value++;
743fef45726SSatish Balay               }
744b4cc0f5aSSatish Balay             }
7450bdbc534SSatish Balay           }
7460bdbc534SSatish Balay         }
7470bdbc534SSatish Balay       }
7480bdbc534SSatish Balay     } else {
7490bdbc534SSatish Balay       if (!baij->donotstash) {
7500bdbc534SSatish Balay         if (roworiented) {
7518798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7520bdbc534SSatish Balay         } else {
7538798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7540bdbc534SSatish Balay         }
7550bdbc534SSatish Balay       }
7560bdbc534SSatish Balay     }
7570bdbc534SSatish Balay   }
758aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
759187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
760187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
761187ce0cbSSatish Balay #endif
7620bdbc534SSatish Balay   PetscFunctionReturn(0);
7630bdbc534SSatish Balay }
764133cdb44SSatish Balay 
7654a2ae208SSatish Balay #undef __FUNCT__
7664a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ"
767dfbe8321SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
768d6de1c52SSatish Balay {
769d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
7706849ba73SBarry Smith   PetscErrorCode ierr;
7716849ba73SBarry Smith   int        bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
77248e59246SSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
773d6de1c52SSatish Balay 
774133cdb44SSatish Balay   PetscFunctionBegin;
775d6de1c52SSatish Balay   for (i=0; i<m; i++) {
776590ac198SBarry Smith     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
777590ac198SBarry Smith     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
778d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
779d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
780d6de1c52SSatish Balay       for (j=0; j<n; j++) {
781590ac198SBarry Smith         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
782590ac198SBarry Smith         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
783d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
784d6de1c52SSatish Balay           col = idxn[j] - bscstart;
78598dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786d64ed03dSBarry Smith         } else {
787905e6a2fSBarry Smith           if (!baij->colmap) {
788905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
789905e6a2fSBarry Smith           }
790aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7910f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
792fa46199cSSatish Balay           data --;
79348e59246SSatish Balay #else
79448e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
79548e59246SSatish Balay #endif
79648e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
797d9d09a02SSatish Balay           else {
79848e59246SSatish Balay             col  = data + idxn[j]%bs;
79998dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
800d6de1c52SSatish Balay           }
801d6de1c52SSatish Balay         }
802d6de1c52SSatish Balay       }
803d64ed03dSBarry Smith     } else {
80429bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
805d6de1c52SSatish Balay     }
806d6de1c52SSatish Balay   }
8073a40ed3dSBarry Smith  PetscFunctionReturn(0);
808d6de1c52SSatish Balay }
809d6de1c52SSatish Balay 
8104a2ae208SSatish Balay #undef __FUNCT__
8114a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ"
812dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
813d6de1c52SSatish Balay {
814d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
815d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
816dfbe8321SBarry Smith   PetscErrorCode ierr;
817dfbe8321SBarry Smith   int i,bs2=baij->bs2;
818329f5518SBarry Smith   PetscReal  sum = 0.0;
8193eda8832SBarry Smith   MatScalar  *v;
820d6de1c52SSatish Balay 
821d64ed03dSBarry Smith   PetscFunctionBegin;
822d6de1c52SSatish Balay   if (baij->size == 1) {
823064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
824d6de1c52SSatish Balay   } else {
825d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
826d6de1c52SSatish Balay       v = amat->a;
827d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++) {
828aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
829329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
830d6de1c52SSatish Balay #else
831d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
832d6de1c52SSatish Balay #endif
833d6de1c52SSatish Balay       }
834d6de1c52SSatish Balay       v = bmat->a;
835d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++) {
836aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
837329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
838d6de1c52SSatish Balay #else
839d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
840d6de1c52SSatish Balay #endif
841d6de1c52SSatish Balay       }
842064f8208SBarry Smith       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
843064f8208SBarry Smith       *nrm = sqrt(*nrm);
844d64ed03dSBarry Smith     } else {
84529bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
846d6de1c52SSatish Balay     }
847d64ed03dSBarry Smith   }
8483a40ed3dSBarry Smith   PetscFunctionReturn(0);
849d6de1c52SSatish Balay }
85057b952d6SSatish Balay 
851bd7f49f5SSatish Balay 
852fef45726SSatish Balay /*
853fef45726SSatish Balay   Creates the hash table, and sets the table
854fef45726SSatish Balay   This table is created only once.
855fef45726SSatish Balay   If new entried need to be added to the matrix
856fef45726SSatish Balay   then the hash table has to be destroyed and
857fef45726SSatish Balay   recreated.
858fef45726SSatish Balay */
8594a2ae208SSatish Balay #undef __FUNCT__
8604a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
861dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
862596b8d2eSBarry Smith {
863596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
864596b8d2eSBarry Smith   Mat         A = baij->A,B=baij->B;
865596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
8660bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
8676849ba73SBarry Smith   PetscErrorCode ierr;
8686849ba73SBarry Smith   int         size,bs2=baij->bs2,rstart=baij->rstart;
869187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
870fef45726SSatish Balay   int         *HT,key;
8713eda8832SBarry Smith   MatScalar   **HD;
872329f5518SBarry Smith   PetscReal   tmp;
873aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
8744a15367fSSatish Balay   int         ct=0,max=0;
8754a15367fSSatish Balay #endif
876fef45726SSatish Balay 
877d64ed03dSBarry Smith   PetscFunctionBegin;
8780bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
8790bdbc534SSatish Balay   size = baij->ht_size;
880fef45726SSatish Balay 
8810bdbc534SSatish Balay   if (baij->ht) {
8820bdbc534SSatish Balay     PetscFunctionReturn(0);
883596b8d2eSBarry Smith   }
8840bdbc534SSatish Balay 
885fef45726SSatish Balay   /* Allocate Memory for Hash Table */
886b0a32e0cSBarry Smith   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
887b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
888b9e4cc15SSatish Balay   HD       = baij->hd;
889a07cd24cSSatish Balay   HT       = baij->ht;
890b9e4cc15SSatish Balay 
891b9e4cc15SSatish Balay 
89287828ca2SBarry Smith   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
8930bdbc534SSatish Balay 
894596b8d2eSBarry Smith 
895596b8d2eSBarry Smith   /* Loop Over A */
8960bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
897596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
8980bdbc534SSatish Balay       row = i+rstart;
8990bdbc534SSatish Balay       col = aj[j]+cstart;
900596b8d2eSBarry Smith 
901187ce0cbSSatish Balay       key = row*Nbs + col + 1;
902c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9030bdbc534SSatish Balay       for (k=0; k<size; k++){
904958c9bccSBarry Smith         if (!HT[(h1+k)%size]) {
9050bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9060bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
907596b8d2eSBarry Smith           break;
908aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
909187ce0cbSSatish Balay         } else {
910187ce0cbSSatish Balay           ct++;
911187ce0cbSSatish Balay #endif
912596b8d2eSBarry Smith         }
913187ce0cbSSatish Balay       }
914aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
915187ce0cbSSatish Balay       if (k> max) max = k;
916187ce0cbSSatish Balay #endif
917596b8d2eSBarry Smith     }
918596b8d2eSBarry Smith   }
919596b8d2eSBarry Smith   /* Loop Over B */
9200bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
921596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
9220bdbc534SSatish Balay       row = i+rstart;
9230bdbc534SSatish Balay       col = garray[bj[j]];
924187ce0cbSSatish Balay       key = row*Nbs + col + 1;
925c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9260bdbc534SSatish Balay       for (k=0; k<size; k++){
927958c9bccSBarry Smith         if (!HT[(h1+k)%size]) {
9280bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9290bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
930596b8d2eSBarry Smith           break;
931aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
932187ce0cbSSatish Balay         } else {
933187ce0cbSSatish Balay           ct++;
934187ce0cbSSatish Balay #endif
935596b8d2eSBarry Smith         }
936187ce0cbSSatish Balay       }
937aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
938187ce0cbSSatish Balay       if (k> max) max = k;
939187ce0cbSSatish Balay #endif
940596b8d2eSBarry Smith     }
941596b8d2eSBarry Smith   }
942596b8d2eSBarry Smith 
943596b8d2eSBarry Smith   /* Print Summary */
944aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
945c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
946596b8d2eSBarry Smith     if (HT[i]) {j++;}
947c38d4ed2SBarry Smith   }
948958c9bccSBarry Smith   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
949187ce0cbSSatish Balay #endif
9503a40ed3dSBarry Smith   PetscFunctionReturn(0);
951596b8d2eSBarry Smith }
95257b952d6SSatish Balay 
9534a2ae208SSatish Balay #undef __FUNCT__
9544a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
955dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
956bbb85fb3SSatish Balay {
957bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
958dfbe8321SBarry Smith   PetscErrorCode ierr;
959dfbe8321SBarry Smith   int nstash,reallocs;
960bbb85fb3SSatish Balay   InsertMode  addv;
961bbb85fb3SSatish Balay 
962bbb85fb3SSatish Balay   PetscFunctionBegin;
963bbb85fb3SSatish Balay   if (baij->donotstash) {
964bbb85fb3SSatish Balay     PetscFunctionReturn(0);
965bbb85fb3SSatish Balay   }
966bbb85fb3SSatish Balay 
967bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
968bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
969bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
97029bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
971bbb85fb3SSatish Balay   }
972bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
973bbb85fb3SSatish Balay 
9748798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
9758798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
9768798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
977b0a32e0cSBarry Smith   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
97846680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
979b0a32e0cSBarry Smith   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
980bbb85fb3SSatish Balay   PetscFunctionReturn(0);
981bbb85fb3SSatish Balay }
982bbb85fb3SSatish Balay 
9834a2ae208SSatish Balay #undef __FUNCT__
9844a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
985dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
986bbb85fb3SSatish Balay {
987bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
988a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
9896849ba73SBarry Smith   PetscErrorCode ierr;
9906849ba73SBarry Smith   int         i,j,rstart,ncols,n,flg,bs2=baij->bs2;
9917c922b88SBarry Smith   int         *row,*col,other_disassembled;
9927c922b88SBarry Smith   PetscTruth  r1,r2,r3;
9933eda8832SBarry Smith   MatScalar   *val;
994bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
995bbb85fb3SSatish Balay 
996bbb85fb3SSatish Balay   PetscFunctionBegin;
997bbb85fb3SSatish Balay   if (!baij->donotstash) {
998a2d1c673SSatish Balay     while (1) {
9998798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1000a2d1c673SSatish Balay       if (!flg) break;
1001a2d1c673SSatish Balay 
1002bbb85fb3SSatish Balay       for (i=0; i<n;) {
1003bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1004bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1005bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
1006bbb85fb3SSatish Balay         else       ncols = n-i;
1007bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
100893fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1009bbb85fb3SSatish Balay         i = j;
1010bbb85fb3SSatish Balay       }
1011bbb85fb3SSatish Balay     }
10128798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1013a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1014a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1015a2d1c673SSatish Balay        restore the original flags */
1016a2d1c673SSatish Balay     r1 = baij->roworiented;
1017a2d1c673SSatish Balay     r2 = a->roworiented;
1018a2d1c673SSatish Balay     r3 = b->roworiented;
10197c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
10207c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
10217c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
1022a2d1c673SSatish Balay     while (1) {
10238798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1024a2d1c673SSatish Balay       if (!flg) break;
1025a2d1c673SSatish Balay 
1026a2d1c673SSatish Balay       for (i=0; i<n;) {
1027a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1028a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1029a2d1c673SSatish Balay         if (j < n) ncols = j-i;
1030a2d1c673SSatish Balay         else       ncols = n-i;
103193fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1032a2d1c673SSatish Balay         i = j;
1033a2d1c673SSatish Balay       }
1034a2d1c673SSatish Balay     }
10358798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1036a2d1c673SSatish Balay     baij->roworiented = r1;
1037a2d1c673SSatish Balay     a->roworiented    = r2;
1038a2d1c673SSatish Balay     b->roworiented    = r3;
1039bbb85fb3SSatish Balay   }
1040bbb85fb3SSatish Balay 
1041bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1042bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1043bbb85fb3SSatish Balay 
1044bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
1045bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1046bbb85fb3SSatish Balay   /*
1047bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1048bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1049bbb85fb3SSatish Balay   */
1050bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1051bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1052bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1053bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1054bbb85fb3SSatish Balay     }
1055bbb85fb3SSatish Balay   }
1056bbb85fb3SSatish Balay 
1057bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1058bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1059bbb85fb3SSatish Balay   }
1060bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1061bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1062bbb85fb3SSatish Balay 
1063aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
1064bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1065f6275e2eSBarry Smith     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1066bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1067bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1068bbb85fb3SSatish Balay   }
1069bbb85fb3SSatish Balay #endif
1070bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1071bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1072bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1073bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1074bbb85fb3SSatish Balay   }
1075bbb85fb3SSatish Balay 
1076606d414cSSatish Balay   if (baij->rowvalues) {
1077606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1078606d414cSSatish Balay     baij->rowvalues = 0;
1079606d414cSSatish Balay   }
1080bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1081bbb85fb3SSatish Balay }
108257b952d6SSatish Balay 
10834a2ae208SSatish Balay #undef __FUNCT__
10844a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
10856849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
108657b952d6SSatish Balay {
108757b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1088dfbe8321SBarry Smith   PetscErrorCode ierr;
1089dfbe8321SBarry Smith   int bs = baij->bs,size = baij->size,rank = baij->rank;
109032077d6dSBarry Smith   PetscTruth        iascii,isdraw;
1091b0a32e0cSBarry Smith   PetscViewer       sviewer;
1092f3ef73ceSBarry Smith   PetscViewerFormat format;
109357b952d6SSatish Balay 
1094d64ed03dSBarry Smith   PetscFunctionBegin;
1095f81bd7ccSHong Zhang   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
109632077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1097fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
109832077d6dSBarry Smith   if (iascii) {
1099b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1100456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
11014e220ebcSLois Curfman McInnes       MatInfo info;
1102d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1103d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1104b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1105273d9f13SBarry Smith               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
11066831982aSBarry Smith               baij->bs,(int)info.memory);CHKERRQ(ierr);
1107d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1108b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1109d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1110b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1111b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
111257b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
11133a40ed3dSBarry Smith       PetscFunctionReturn(0);
1114fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1115b0a32e0cSBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
11163a40ed3dSBarry Smith       PetscFunctionReturn(0);
111704929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
111804929863SHong Zhang       PetscFunctionReturn(0);
111957b952d6SSatish Balay     }
112057b952d6SSatish Balay   }
112157b952d6SSatish Balay 
11220f5bd95cSBarry Smith   if (isdraw) {
1123b0a32e0cSBarry Smith     PetscDraw       draw;
112457b952d6SSatish Balay     PetscTruth isnull;
1125b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1126b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
112757b952d6SSatish Balay   }
112857b952d6SSatish Balay 
112957b952d6SSatish Balay   if (size == 1) {
1130873048abSBarry Smith     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
113157b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1132d64ed03dSBarry Smith   } else {
113357b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
113457b952d6SSatish Balay     Mat         A;
113557b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1136273d9f13SBarry Smith     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
11373eda8832SBarry Smith     MatScalar   *a;
113857b952d6SSatish Balay 
1139f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1140f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
114157b952d6SSatish Balay     if (!rank) {
1142f204ca49SKris Buschelman       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1143d64ed03dSBarry Smith     } else {
1144f204ca49SKris Buschelman       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
114557b952d6SSatish Balay     }
1146f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1147f204ca49SKris Buschelman     ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1148b0a32e0cSBarry Smith     PetscLogObjectParent(mat,A);
114957b952d6SSatish Balay 
115057b952d6SSatish Balay     /* copy over the A part */
115157b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
115257b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
115382502324SSatish Balay     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
115457b952d6SSatish Balay 
115557b952d6SSatish Balay     for (i=0; i<mbs; i++) {
115657b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
115757b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
115857b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
115957b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
116057b952d6SSatish Balay         for (k=0; k<bs; k++) {
116193fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1162cee3aa6bSSatish Balay           col++; a += bs;
116357b952d6SSatish Balay         }
116457b952d6SSatish Balay       }
116557b952d6SSatish Balay     }
116657b952d6SSatish Balay     /* copy over the B part */
116757b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
116857b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
116957b952d6SSatish Balay     for (i=0; i<mbs; i++) {
117057b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
117157b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
117257b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
117357b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
117457b952d6SSatish Balay         for (k=0; k<bs; k++) {
117593fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1176cee3aa6bSSatish Balay           col++; a += bs;
117757b952d6SSatish Balay         }
117857b952d6SSatish Balay       }
117957b952d6SSatish Balay     }
1180606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
11816d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
11826d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
118355843e3eSBarry Smith     /*
118455843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1185b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
118655843e3eSBarry Smith     */
1187b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1188f14a1c24SBarry Smith     if (!rank) {
1189e36acaf3SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
11906831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
119157b952d6SSatish Balay     }
1192b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
119357b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
119457b952d6SSatish Balay   }
11953a40ed3dSBarry Smith   PetscFunctionReturn(0);
119657b952d6SSatish Balay }
119757b952d6SSatish Balay 
11984a2ae208SSatish Balay #undef __FUNCT__
11994a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ"
1200dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
120157b952d6SSatish Balay {
1202dfbe8321SBarry Smith   PetscErrorCode ierr;
120332077d6dSBarry Smith   PetscTruth iascii,isdraw,issocket,isbinary;
120457b952d6SSatish Balay 
1205d64ed03dSBarry Smith   PetscFunctionBegin;
120632077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1207fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1208b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1209fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
121032077d6dSBarry Smith   if (iascii || isdraw || issocket || isbinary) {
12117b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
12125cd90555SBarry Smith   } else {
121329bbc08cSBarry Smith     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
121457b952d6SSatish Balay   }
12153a40ed3dSBarry Smith   PetscFunctionReturn(0);
121657b952d6SSatish Balay }
121757b952d6SSatish Balay 
12184a2ae208SSatish Balay #undef __FUNCT__
12194a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ"
1220dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
122179bdfe76SSatish Balay {
122279bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1223dfbe8321SBarry Smith   PetscErrorCode ierr;
122479bdfe76SSatish Balay 
1225d64ed03dSBarry Smith   PetscFunctionBegin;
1226aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1227b0a32e0cSBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
122879bdfe76SSatish Balay #endif
12298798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12308798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1231606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
123279bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
123379bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1234aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
12350f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
123648e59246SSatish Balay #else
1237606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
123848e59246SSatish Balay #endif
1239606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1240606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1241606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1242606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1243606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1244606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
12456fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
12466fa18ffdSBarry Smith   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
12476fa18ffdSBarry Smith #endif
1248606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
12493a40ed3dSBarry Smith   PetscFunctionReturn(0);
125079bdfe76SSatish Balay }
125179bdfe76SSatish Balay 
12524a2ae208SSatish Balay #undef __FUNCT__
12534a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ"
1254dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1255cee3aa6bSSatish Balay {
1256cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1257dfbe8321SBarry Smith   PetscErrorCode ierr;
1258dfbe8321SBarry Smith   int nt;
1259cee3aa6bSSatish Balay 
1260d64ed03dSBarry Smith   PetscFunctionBegin;
1261e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1262273d9f13SBarry Smith   if (nt != A->n) {
126329bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
126447b4a8eaSLois Curfman McInnes   }
1265e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1266273d9f13SBarry Smith   if (nt != A->m) {
126729bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
126847b4a8eaSLois Curfman McInnes   }
126943a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1270f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
127143a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1272f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
127343a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
12743a40ed3dSBarry Smith   PetscFunctionReturn(0);
1275cee3aa6bSSatish Balay }
1276cee3aa6bSSatish Balay 
12774a2ae208SSatish Balay #undef __FUNCT__
12784a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1279dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1280cee3aa6bSSatish Balay {
1281cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1282dfbe8321SBarry Smith   PetscErrorCode ierr;
1283d64ed03dSBarry Smith 
1284d64ed03dSBarry Smith   PetscFunctionBegin;
128543a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1286f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
128743a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1288f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12893a40ed3dSBarry Smith   PetscFunctionReturn(0);
1290cee3aa6bSSatish Balay }
1291cee3aa6bSSatish Balay 
12924a2ae208SSatish Balay #undef __FUNCT__
12934a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1294dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1295cee3aa6bSSatish Balay {
1296cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1297dfbe8321SBarry Smith   PetscErrorCode ierr;
1298cee3aa6bSSatish Balay 
1299d64ed03dSBarry Smith   PetscFunctionBegin;
1300cee3aa6bSSatish Balay   /* do nondiagonal part */
13017c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1302cee3aa6bSSatish Balay   /* send it on its way */
1303537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1304cee3aa6bSSatish Balay   /* do local part */
13057c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1306cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1307cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1308cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1309639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
13103a40ed3dSBarry Smith   PetscFunctionReturn(0);
1311cee3aa6bSSatish Balay }
1312cee3aa6bSSatish Balay 
13134a2ae208SSatish Balay #undef __FUNCT__
13144a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1315dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1316cee3aa6bSSatish Balay {
1317cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1318dfbe8321SBarry Smith   PetscErrorCode ierr;
1319cee3aa6bSSatish Balay 
1320d64ed03dSBarry Smith   PetscFunctionBegin;
1321cee3aa6bSSatish Balay   /* do nondiagonal part */
13227c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1323cee3aa6bSSatish Balay   /* send it on its way */
1324537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1325cee3aa6bSSatish Balay   /* do local part */
13267c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1327cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1328cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1329cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1330537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
13313a40ed3dSBarry Smith   PetscFunctionReturn(0);
1332cee3aa6bSSatish Balay }
1333cee3aa6bSSatish Balay 
1334cee3aa6bSSatish Balay /*
1335cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1336cee3aa6bSSatish Balay    diagonal block
1337cee3aa6bSSatish Balay */
13384a2ae208SSatish Balay #undef __FUNCT__
13394a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1340dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1341cee3aa6bSSatish Balay {
1342cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1343dfbe8321SBarry Smith   PetscErrorCode ierr;
1344d64ed03dSBarry Smith 
1345d64ed03dSBarry Smith   PetscFunctionBegin;
1346273d9f13SBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13473a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13483a40ed3dSBarry Smith   PetscFunctionReturn(0);
1349cee3aa6bSSatish Balay }
1350cee3aa6bSSatish Balay 
13514a2ae208SSatish Balay #undef __FUNCT__
13524a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ"
1353dfbe8321SBarry Smith PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1354cee3aa6bSSatish Balay {
1355cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1356dfbe8321SBarry Smith   PetscErrorCode ierr;
1357d64ed03dSBarry Smith 
1358d64ed03dSBarry Smith   PetscFunctionBegin;
1359cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1360cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
13613a40ed3dSBarry Smith   PetscFunctionReturn(0);
1362cee3aa6bSSatish Balay }
1363026e39d0SSatish Balay 
13644a2ae208SSatish Balay #undef __FUNCT__
13654a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ"
1366dfbe8321SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1367acdf5bf4SSatish Balay {
1368acdf5bf4SSatish Balay   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
136987828ca2SBarry Smith   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
13706849ba73SBarry Smith   PetscErrorCode ierr;
13716849ba73SBarry Smith   int          bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1372d9d09a02SSatish Balay   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1373d9d09a02SSatish Balay   int          *cmap,*idx_p,cstart = mat->cstart;
1374acdf5bf4SSatish Balay 
1375d64ed03dSBarry Smith   PetscFunctionBegin;
137629bbc08cSBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1377acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1378acdf5bf4SSatish Balay 
1379acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1380acdf5bf4SSatish Balay     /*
1381acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1382acdf5bf4SSatish Balay     */
1383acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1384bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1385bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1386acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1387acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1388acdf5bf4SSatish Balay     }
138987828ca2SBarry Smith     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1390acdf5bf4SSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1391acdf5bf4SSatish Balay   }
1392acdf5bf4SSatish Balay 
139329bbc08cSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1394d9d09a02SSatish Balay   lrow = row - brstart;
1395acdf5bf4SSatish Balay 
1396acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1397acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1398acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1399f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1400f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1401acdf5bf4SSatish Balay   nztot = nzA + nzB;
1402acdf5bf4SSatish Balay 
1403acdf5bf4SSatish Balay   cmap  = mat->garray;
1404acdf5bf4SSatish Balay   if (v  || idx) {
1405acdf5bf4SSatish Balay     if (nztot) {
1406acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1407acdf5bf4SSatish Balay       int imark = -1;
1408acdf5bf4SSatish Balay       if (v) {
1409acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1410acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1411d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1412acdf5bf4SSatish Balay           else break;
1413acdf5bf4SSatish Balay         }
1414acdf5bf4SSatish Balay         imark = i;
1415acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1416acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1417acdf5bf4SSatish Balay       }
1418acdf5bf4SSatish Balay       if (idx) {
1419acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1420acdf5bf4SSatish Balay         if (imark > -1) {
1421acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1422bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1423acdf5bf4SSatish Balay           }
1424acdf5bf4SSatish Balay         } else {
1425acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1426d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1427d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1428acdf5bf4SSatish Balay             else break;
1429acdf5bf4SSatish Balay           }
1430acdf5bf4SSatish Balay           imark = i;
1431acdf5bf4SSatish Balay         }
1432d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1433d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1434acdf5bf4SSatish Balay       }
1435d64ed03dSBarry Smith     } else {
1436d212a18eSSatish Balay       if (idx) *idx = 0;
1437d212a18eSSatish Balay       if (v)   *v   = 0;
1438d212a18eSSatish Balay     }
1439acdf5bf4SSatish Balay   }
1440acdf5bf4SSatish Balay   *nz = nztot;
1441f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1442f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14433a40ed3dSBarry Smith   PetscFunctionReturn(0);
1444acdf5bf4SSatish Balay }
1445acdf5bf4SSatish Balay 
14464a2ae208SSatish Balay #undef __FUNCT__
14474a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1448dfbe8321SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1449acdf5bf4SSatish Balay {
1450acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1451d64ed03dSBarry Smith 
1452d64ed03dSBarry Smith   PetscFunctionBegin;
1453acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
145429bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1455acdf5bf4SSatish Balay   }
1456acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14573a40ed3dSBarry Smith   PetscFunctionReturn(0);
1458acdf5bf4SSatish Balay }
1459acdf5bf4SSatish Balay 
14604a2ae208SSatish Balay #undef __FUNCT__
14614a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1462dfbe8321SBarry Smith PetscErrorCode MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
14635a838052SSatish Balay {
14645a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1465d64ed03dSBarry Smith 
1466d64ed03dSBarry Smith   PetscFunctionBegin;
14675a838052SSatish Balay   *bs = baij->bs;
14683a40ed3dSBarry Smith   PetscFunctionReturn(0);
14695a838052SSatish Balay }
14705a838052SSatish Balay 
14714a2ae208SSatish Balay #undef __FUNCT__
14724a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1473dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
147458667388SSatish Balay {
147558667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1476dfbe8321SBarry Smith   PetscErrorCode ierr;
1477d64ed03dSBarry Smith 
1478d64ed03dSBarry Smith   PetscFunctionBegin;
147958667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
148058667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14813a40ed3dSBarry Smith   PetscFunctionReturn(0);
148258667388SSatish Balay }
14830ac07820SSatish Balay 
14844a2ae208SSatish Balay #undef __FUNCT__
14854a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1486dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14870ac07820SSatish Balay {
14884e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
14894e220ebcSLois Curfman McInnes   Mat         A = a->A,B = a->B;
1490dfbe8321SBarry Smith   PetscErrorCode ierr;
1491329f5518SBarry Smith   PetscReal   isend[5],irecv[5];
14920ac07820SSatish Balay 
1493d64ed03dSBarry Smith   PetscFunctionBegin;
1494f6275e2eSBarry Smith   info->block_size     = (PetscReal)a->bs;
14954e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
14960e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1497de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14984e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
14990e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1500de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
15010ac07820SSatish Balay   if (flag == MAT_LOCAL) {
15024e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
15034e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
15044e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
15054e220ebcSLois Curfman McInnes     info->memory       = isend[3];
15064e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
15070ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1508d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
15094e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15104e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15114e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15124e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15134e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
15140ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1515d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
15164e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15174e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15184e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15194e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15204e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1521d41123aaSBarry Smith   } else {
152229bbc08cSBarry Smith     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
15230ac07820SSatish Balay   }
1524f6275e2eSBarry Smith   info->rows_global       = (PetscReal)A->M;
1525f6275e2eSBarry Smith   info->columns_global    = (PetscReal)A->N;
1526f6275e2eSBarry Smith   info->rows_local        = (PetscReal)A->m;
1527f6275e2eSBarry Smith   info->columns_local     = (PetscReal)A->N;
15284e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
15294e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
15304e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
15313a40ed3dSBarry Smith   PetscFunctionReturn(0);
15320ac07820SSatish Balay }
15330ac07820SSatish Balay 
15344a2ae208SSatish Balay #undef __FUNCT__
15354a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ"
1536dfbe8321SBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
153758667388SSatish Balay {
153858667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1539dfbe8321SBarry Smith   PetscErrorCode ierr;
154058667388SSatish Balay 
1541d64ed03dSBarry Smith   PetscFunctionBegin;
154212c028f9SKris Buschelman   switch (op) {
154312c028f9SKris Buschelman   case MAT_NO_NEW_NONZERO_LOCATIONS:
154412c028f9SKris Buschelman   case MAT_YES_NEW_NONZERO_LOCATIONS:
154512c028f9SKris Buschelman   case MAT_COLUMNS_UNSORTED:
154612c028f9SKris Buschelman   case MAT_COLUMNS_SORTED:
154712c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
154812c028f9SKris Buschelman   case MAT_KEEP_ZEROED_ROWS:
154912c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
155098305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
155198305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
155212c028f9SKris Buschelman     break;
155312c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
15547c922b88SBarry Smith     a->roworiented = PETSC_TRUE;
155598305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
155698305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
155712c028f9SKris Buschelman     break;
155812c028f9SKris Buschelman   case MAT_ROWS_SORTED:
155912c028f9SKris Buschelman   case MAT_ROWS_UNSORTED:
156012c028f9SKris Buschelman   case MAT_YES_NEW_DIAGONALS:
1561b0a32e0cSBarry Smith     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
156212c028f9SKris Buschelman     break;
156312c028f9SKris Buschelman   case MAT_COLUMN_ORIENTED:
15647c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
156598305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
156698305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
156712c028f9SKris Buschelman     break;
156812c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15697c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
157012c028f9SKris Buschelman     break;
157112c028f9SKris Buschelman   case MAT_NO_NEW_DIAGONALS:
157229bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
157312c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
15747c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
157512c028f9SKris Buschelman     break;
157677e54ba9SKris Buschelman   case MAT_SYMMETRIC:
157777e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
15782188ac68SBarry Smith   case MAT_HERMITIAN:
15792188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
15802188ac68SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
15812188ac68SBarry Smith     break;
15829a4540c5SBarry Smith   case MAT_NOT_SYMMETRIC:
15839a4540c5SBarry Smith   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
15849a4540c5SBarry Smith   case MAT_NOT_HERMITIAN:
15859a4540c5SBarry Smith   case MAT_NOT_SYMMETRY_ETERNAL:
158677e54ba9SKris Buschelman     break;
158712c028f9SKris Buschelman   default:
158829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"unknown option");
1589d64ed03dSBarry Smith   }
15903a40ed3dSBarry Smith   PetscFunctionReturn(0);
159158667388SSatish Balay }
159258667388SSatish Balay 
15934a2ae208SSatish Balay #undef __FUNCT__
15944a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ("
1595dfbe8321SBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15960ac07820SSatish Balay {
15970ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
15980ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15990ac07820SSatish Balay   Mat         B;
1600dfbe8321SBarry Smith   PetscErrorCode ierr;
1601dfbe8321SBarry Smith   int M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
16020ac07820SSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
16033eda8832SBarry Smith   MatScalar   *a;
16040ac07820SSatish Balay 
1605d64ed03dSBarry Smith   PetscFunctionBegin;
160629bbc08cSBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1607f204ca49SKris Buschelman   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1608f204ca49SKris Buschelman   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1609f204ca49SKris Buschelman   ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
16100ac07820SSatish Balay 
16110ac07820SSatish Balay   /* copy over the A part */
16120ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
16130ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
161482502324SSatish Balay   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
16150ac07820SSatish Balay 
16160ac07820SSatish Balay   for (i=0; i<mbs; i++) {
16170ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16180ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16190ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16200ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
16210ac07820SSatish Balay       for (k=0; k<bs; k++) {
162293fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16230ac07820SSatish Balay         col++; a += bs;
16240ac07820SSatish Balay       }
16250ac07820SSatish Balay     }
16260ac07820SSatish Balay   }
16270ac07820SSatish Balay   /* copy over the B part */
16280ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
16290ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
16300ac07820SSatish Balay   for (i=0; i<mbs; i++) {
16310ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16320ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16330ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16340ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16350ac07820SSatish Balay       for (k=0; k<bs; k++) {
163693fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16370ac07820SSatish Balay         col++; a += bs;
16380ac07820SSatish Balay       }
16390ac07820SSatish Balay     }
16400ac07820SSatish Balay   }
1641606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
16420ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16430ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16440ac07820SSatish Balay 
16457c922b88SBarry Smith   if (matout) {
16460ac07820SSatish Balay     *matout = B;
16470ac07820SSatish Balay   } else {
1648273d9f13SBarry Smith     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
16490ac07820SSatish Balay   }
16503a40ed3dSBarry Smith   PetscFunctionReturn(0);
16510ac07820SSatish Balay }
16520e95ebc0SSatish Balay 
16534a2ae208SSatish Balay #undef __FUNCT__
16544a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1655dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16560e95ebc0SSatish Balay {
165736c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
165836c4a09eSSatish Balay   Mat         a = baij->A,b = baij->B;
1659dfbe8321SBarry Smith   PetscErrorCode ierr;
1660dfbe8321SBarry Smith   int s1,s2,s3;
16610e95ebc0SSatish Balay 
1662d64ed03dSBarry Smith   PetscFunctionBegin;
166336c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
166436c4a09eSSatish Balay   if (rr) {
166536c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
166629bbc08cSBarry Smith     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
166736c4a09eSSatish Balay     /* Overlap communication with computation. */
166836c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
166936c4a09eSSatish Balay   }
16700e95ebc0SSatish Balay   if (ll) {
16710e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
167229bbc08cSBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1673a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
16740e95ebc0SSatish Balay   }
167536c4a09eSSatish Balay   /* scale  the diagonal block */
167636c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
167736c4a09eSSatish Balay 
167836c4a09eSSatish Balay   if (rr) {
167936c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
168036c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1681a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
168236c4a09eSSatish Balay   }
168336c4a09eSSatish Balay 
16843a40ed3dSBarry Smith   PetscFunctionReturn(0);
16850e95ebc0SSatish Balay }
16860e95ebc0SSatish Balay 
16874a2ae208SSatish Balay #undef __FUNCT__
16884a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1689dfbe8321SBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
16900ac07820SSatish Balay {
16910ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
16926849ba73SBarry Smith   PetscErrorCode ierr;
16936849ba73SBarry Smith   int            i,N,*rows,*owners = l->rowners,size = l->size;
1694c1dc657dSBarry Smith   int            *nprocs,j,idx,nsends,row;
16950ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
16960ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1697a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16980ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16990ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
17000ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
17010ac07820SSatish Balay   IS             istmp;
170235d8aa7fSBarry Smith   PetscTruth     found;
17030ac07820SSatish Balay 
1704d64ed03dSBarry Smith   PetscFunctionBegin;
1705f14a1c24SBarry Smith   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
17060ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
17070ac07820SSatish Balay 
17080ac07820SSatish Balay   /*  first count number of contributors to each processor */
170982502324SSatish Balay   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1710549d3d68SSatish Balay   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1711b0a32e0cSBarry Smith   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
17120ac07820SSatish Balay   for (i=0; i<N; i++) {
17130ac07820SSatish Balay     idx   = rows[i];
171435d8aa7fSBarry Smith     found = PETSC_FALSE;
17150ac07820SSatish Balay     for (j=0; j<size; j++) {
17160ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1717c1dc657dSBarry Smith         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
17180ac07820SSatish Balay       }
17190ac07820SSatish Balay     }
172029bbc08cSBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
17210ac07820SSatish Balay   }
1722c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
17230ac07820SSatish Balay 
17240ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
1725c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
17260ac07820SSatish Balay 
17270ac07820SSatish Balay   /* post receives:   */
1728b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1729b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
17300ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1731ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
17320ac07820SSatish Balay   }
17330ac07820SSatish Balay 
17340ac07820SSatish Balay   /* do sends:
17350ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
17360ac07820SSatish Balay      the ith processor
17370ac07820SSatish Balay   */
1738b0a32e0cSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1739b0a32e0cSBarry Smith   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1740b0a32e0cSBarry Smith   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
17410ac07820SSatish Balay   starts[0]  = 0;
1742c1dc657dSBarry Smith   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17430ac07820SSatish Balay   for (i=0; i<N; i++) {
17440ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
17450ac07820SSatish Balay   }
17466831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
17470ac07820SSatish Balay 
17480ac07820SSatish Balay   starts[0] = 0;
1749c1dc657dSBarry Smith   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17500ac07820SSatish Balay   count = 0;
17510ac07820SSatish Balay   for (i=0; i<size; i++) {
1752c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
1753c1dc657dSBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17540ac07820SSatish Balay     }
17550ac07820SSatish Balay   }
1756606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
17570ac07820SSatish Balay 
17580ac07820SSatish Balay   base = owners[rank]*bs;
17590ac07820SSatish Balay 
17600ac07820SSatish Balay   /*  wait on receives */
1761b0a32e0cSBarry Smith   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
17620ac07820SSatish Balay   source = lens + nrecvs;
17630ac07820SSatish Balay   count  = nrecvs; slen = 0;
17640ac07820SSatish Balay   while (count) {
1765ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17660ac07820SSatish Balay     /* unpack receives into our local space */
1767ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
17680ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17690ac07820SSatish Balay     lens[imdex]    = n;
17700ac07820SSatish Balay     slen          += n;
17710ac07820SSatish Balay     count--;
17720ac07820SSatish Balay   }
1773606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
17740ac07820SSatish Balay 
17750ac07820SSatish Balay   /* move the data into the send scatter */
1776b0a32e0cSBarry Smith   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
17770ac07820SSatish Balay   count = 0;
17780ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
17790ac07820SSatish Balay     values = rvalues + i*nmax;
17800ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
17810ac07820SSatish Balay       lrows[count++] = values[j] - base;
17820ac07820SSatish Balay     }
17830ac07820SSatish Balay   }
1784606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1785606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1786606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1787606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
17880ac07820SSatish Balay 
17890ac07820SSatish Balay   /* actually zap the local rows */
1790029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1791b0a32e0cSBarry Smith   PetscLogObjectParent(A,istmp);
1792a07cd24cSSatish Balay 
179372dacd9aSBarry Smith   /*
179472dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
179572dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
179672dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
179772dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
179872dacd9aSBarry Smith 
179972dacd9aSBarry Smith        Contributed by: Mathew Knepley
180072dacd9aSBarry Smith   */
18019c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
18026fa18ffdSBarry Smith   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
18039c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
18046fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
18059c957beeSSatish Balay   } else if (diag) {
18066fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1807fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
180829bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1809fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
18106525c446SSatish Balay     }
1811a07cd24cSSatish Balay     for (i=0; i<slen; i++) {
1812a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
18133f11ea53SBarry Smith       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1814a07cd24cSSatish Balay     }
1815a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1816a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
18179c957beeSSatish Balay   } else {
18186fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1819a07cd24cSSatish Balay   }
18209c957beeSSatish Balay 
18219c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1822606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1823a07cd24cSSatish Balay 
18240ac07820SSatish Balay   /* wait on sends */
18250ac07820SSatish Balay   if (nsends) {
182682502324SSatish Balay     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1827ca161407SBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1828606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
18290ac07820SSatish Balay   }
1830606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1831606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
18320ac07820SSatish Balay 
18333a40ed3dSBarry Smith   PetscFunctionReturn(0);
18340ac07820SSatish Balay }
183572dacd9aSBarry Smith 
18364a2ae208SSatish Balay #undef __FUNCT__
18374a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1838dfbe8321SBarry Smith PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1839ba4ca20aSSatish Balay {
1840ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
184125fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1842133cdb44SSatish Balay   static int  called = 0;
1843dfbe8321SBarry Smith   PetscErrorCode ierr;
1844ba4ca20aSSatish Balay 
1845d64ed03dSBarry Smith   PetscFunctionBegin;
18463a40ed3dSBarry Smith   if (!a->rank) {
18473a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
184825fdafccSSatish Balay   }
184925fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1850d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1851d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
18523a40ed3dSBarry Smith   PetscFunctionReturn(0);
1853ba4ca20aSSatish Balay }
18540ac07820SSatish Balay 
18554a2ae208SSatish Balay #undef __FUNCT__
18564a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1857dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1858bb5a7306SBarry Smith {
1859bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1860dfbe8321SBarry Smith   PetscErrorCode ierr;
1861d64ed03dSBarry Smith 
1862d64ed03dSBarry Smith   PetscFunctionBegin;
1863bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
18643a40ed3dSBarry Smith   PetscFunctionReturn(0);
1865bb5a7306SBarry Smith }
1866bb5a7306SBarry Smith 
18676849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18680ac07820SSatish Balay 
18694a2ae208SSatish Balay #undef __FUNCT__
18704a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ"
1871dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
18727fc3c18eSBarry Smith {
18737fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
18747fc3c18eSBarry Smith   Mat         a,b,c,d;
18757fc3c18eSBarry Smith   PetscTruth  flg;
1876dfbe8321SBarry Smith   PetscErrorCode ierr;
18777fc3c18eSBarry Smith 
18787fc3c18eSBarry Smith   PetscFunctionBegin;
18797fc3c18eSBarry Smith   a = matA->A; b = matA->B;
18807fc3c18eSBarry Smith   c = matB->A; d = matB->B;
18817fc3c18eSBarry Smith 
18827fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
18837fc3c18eSBarry Smith   if (flg == PETSC_TRUE) {
18847fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18857fc3c18eSBarry Smith   }
18867fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
18877fc3c18eSBarry Smith   PetscFunctionReturn(0);
18887fc3c18eSBarry Smith }
18897fc3c18eSBarry Smith 
1890273d9f13SBarry Smith 
18914a2ae208SSatish Balay #undef __FUNCT__
18924a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1893dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1894273d9f13SBarry Smith {
1895dfbe8321SBarry Smith   PetscErrorCode ierr;
1896273d9f13SBarry Smith 
1897273d9f13SBarry Smith   PetscFunctionBegin;
1898273d9f13SBarry Smith   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1899273d9f13SBarry Smith   PetscFunctionReturn(0);
1900273d9f13SBarry Smith }
1901273d9f13SBarry Smith 
190279bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1903cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1904cc2dc46cSBarry Smith        MatSetValues_MPIBAIJ,
1905cc2dc46cSBarry Smith        MatGetRow_MPIBAIJ,
1906cc2dc46cSBarry Smith        MatRestoreRow_MPIBAIJ,
1907cc2dc46cSBarry Smith        MatMult_MPIBAIJ,
190897304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ,
19097c922b88SBarry Smith        MatMultTranspose_MPIBAIJ,
19107c922b88SBarry Smith        MatMultTransposeAdd_MPIBAIJ,
1911cc2dc46cSBarry Smith        0,
1912cc2dc46cSBarry Smith        0,
1913cc2dc46cSBarry Smith        0,
191497304618SKris Buschelman /*10*/ 0,
1915cc2dc46cSBarry Smith        0,
1916cc2dc46cSBarry Smith        0,
1917cc2dc46cSBarry Smith        0,
1918cc2dc46cSBarry Smith        MatTranspose_MPIBAIJ,
191997304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ,
19207fc3c18eSBarry Smith        MatEqual_MPIBAIJ,
1921cc2dc46cSBarry Smith        MatGetDiagonal_MPIBAIJ,
1922cc2dc46cSBarry Smith        MatDiagonalScale_MPIBAIJ,
1923cc2dc46cSBarry Smith        MatNorm_MPIBAIJ,
192497304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ,
1925cc2dc46cSBarry Smith        MatAssemblyEnd_MPIBAIJ,
1926cc2dc46cSBarry Smith        0,
1927cc2dc46cSBarry Smith        MatSetOption_MPIBAIJ,
1928cc2dc46cSBarry Smith        MatZeroEntries_MPIBAIJ,
192997304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ,
1930cc2dc46cSBarry Smith        0,
1931cc2dc46cSBarry Smith        0,
1932cc2dc46cSBarry Smith        0,
1933cc2dc46cSBarry Smith        0,
193497304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ,
1935273d9f13SBarry Smith        0,
1936cc2dc46cSBarry Smith        0,
1937cc2dc46cSBarry Smith        0,
1938cc2dc46cSBarry Smith        0,
193997304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ,
1940cc2dc46cSBarry Smith        0,
1941cc2dc46cSBarry Smith        0,
1942cc2dc46cSBarry Smith        0,
1943cc2dc46cSBarry Smith        0,
194497304618SKris Buschelman /*40*/ 0,
1945cc2dc46cSBarry Smith        MatGetSubMatrices_MPIBAIJ,
1946cc2dc46cSBarry Smith        MatIncreaseOverlap_MPIBAIJ,
1947cc2dc46cSBarry Smith        MatGetValues_MPIBAIJ,
1948cc2dc46cSBarry Smith        0,
194997304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ,
1950cc2dc46cSBarry Smith        MatScale_MPIBAIJ,
1951cc2dc46cSBarry Smith        0,
1952cc2dc46cSBarry Smith        0,
1953cc2dc46cSBarry Smith        0,
195497304618SKris Buschelman /*50*/ MatGetBlockSize_MPIBAIJ,
1955cc2dc46cSBarry Smith        0,
1956cc2dc46cSBarry Smith        0,
1957cc2dc46cSBarry Smith        0,
1958cc2dc46cSBarry Smith        0,
195997304618SKris Buschelman /*55*/ 0,
1960cc2dc46cSBarry Smith        0,
1961cc2dc46cSBarry Smith        MatSetUnfactored_MPIBAIJ,
1962cc2dc46cSBarry Smith        0,
1963cc2dc46cSBarry Smith        MatSetValuesBlocked_MPIBAIJ,
196497304618SKris Buschelman /*60*/ 0,
1965f14a1c24SBarry Smith        MatDestroy_MPIBAIJ,
1966f14a1c24SBarry Smith        MatView_MPIBAIJ,
19678a124369SBarry Smith        MatGetPetscMaps_Petsc,
19687843d17aSBarry Smith        0,
196997304618SKris Buschelman /*65*/ 0,
19707843d17aSBarry Smith        0,
19717843d17aSBarry Smith        0,
19727843d17aSBarry Smith        0,
19737843d17aSBarry Smith        0,
197497304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ,
19757843d17aSBarry Smith        0,
197697304618SKris Buschelman        0,
197797304618SKris Buschelman        0,
197897304618SKris Buschelman        0,
197997304618SKris Buschelman /*75*/ 0,
198097304618SKris Buschelman        0,
198197304618SKris Buschelman        0,
198297304618SKris Buschelman        0,
198397304618SKris Buschelman        0,
198497304618SKris Buschelman /*80*/ 0,
198597304618SKris Buschelman        0,
198697304618SKris Buschelman        0,
198797304618SKris Buschelman        0,
1988*865e5f61SKris Buschelman        MatLoad_MPIBAIJ,
1989*865e5f61SKris Buschelman /*85*/ 0,
1990*865e5f61SKris Buschelman        0,
1991*865e5f61SKris Buschelman        0,
1992*865e5f61SKris Buschelman        0,
1993*865e5f61SKris Buschelman        0,
1994*865e5f61SKris Buschelman /*90*/ 0,
1995*865e5f61SKris Buschelman        0,
1996*865e5f61SKris Buschelman        0,
1997*865e5f61SKris Buschelman        0,
1998*865e5f61SKris Buschelman        0,
1999*865e5f61SKris Buschelman /*95*/ 0,
2000*865e5f61SKris Buschelman        0,
2001*865e5f61SKris Buschelman        0,
2002*865e5f61SKris Buschelman        0};
200379bdfe76SSatish Balay 
20045ef9f2a5SBarry Smith 
2005e18c124aSSatish Balay EXTERN_C_BEGIN
20064a2ae208SSatish Balay #undef __FUNCT__
20074a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2008dfbe8321SBarry Smith PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
20095ef9f2a5SBarry Smith {
20105ef9f2a5SBarry Smith   PetscFunctionBegin;
20115ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
20125ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
20135ef9f2a5SBarry Smith   PetscFunctionReturn(0);
20145ef9f2a5SBarry Smith }
2015e18c124aSSatish Balay EXTERN_C_END
201679bdfe76SSatish Balay 
2017273d9f13SBarry Smith EXTERN_C_BEGIN
2018d94109b8SHong Zhang extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
2019d94109b8SHong Zhang EXTERN_C_END
2020d94109b8SHong Zhang 
2021d94109b8SHong Zhang EXTERN_C_BEGIN
20224a2ae208SSatish Balay #undef __FUNCT__
2023a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2024dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2025a23d5eceSKris Buschelman {
2026a23d5eceSKris Buschelman   Mat_MPIBAIJ  *b;
2027dfbe8321SBarry Smith   PetscErrorCode ierr;
2028dfbe8321SBarry Smith   int  i;
2029a23d5eceSKris Buschelman 
2030a23d5eceSKris Buschelman   PetscFunctionBegin;
2031a23d5eceSKris Buschelman   B->preallocated = PETSC_TRUE;
2032a23d5eceSKris Buschelman   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2033a23d5eceSKris Buschelman 
2034a23d5eceSKris Buschelman   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2035a23d5eceSKris Buschelman   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2036a23d5eceSKris Buschelman   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2037a23d5eceSKris Buschelman   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2038a23d5eceSKris Buschelman   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2039a23d5eceSKris Buschelman   if (d_nnz) {
2040a23d5eceSKris Buschelman   for (i=0; i<B->m/bs; i++) {
2041a23d5eceSKris Buschelman       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2042a23d5eceSKris Buschelman     }
2043a23d5eceSKris Buschelman   }
2044a23d5eceSKris Buschelman   if (o_nnz) {
2045a23d5eceSKris Buschelman     for (i=0; i<B->m/bs; i++) {
2046a23d5eceSKris Buschelman       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2047a23d5eceSKris Buschelman     }
2048a23d5eceSKris Buschelman   }
2049a23d5eceSKris Buschelman 
2050a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2051a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2052a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2053a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2054a23d5eceSKris Buschelman 
2055a23d5eceSKris Buschelman   b = (Mat_MPIBAIJ*)B->data;
2056a23d5eceSKris Buschelman   b->bs  = bs;
2057a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2058a23d5eceSKris Buschelman   b->mbs = B->m/bs;
2059a23d5eceSKris Buschelman   b->nbs = B->n/bs;
2060a23d5eceSKris Buschelman   b->Mbs = B->M/bs;
2061a23d5eceSKris Buschelman   b->Nbs = B->N/bs;
2062a23d5eceSKris Buschelman 
2063a23d5eceSKris Buschelman   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2064a23d5eceSKris Buschelman   b->rowners[0]    = 0;
2065a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2066a23d5eceSKris Buschelman     b->rowners[i] += b->rowners[i-1];
2067a23d5eceSKris Buschelman   }
2068a23d5eceSKris Buschelman   b->rstart    = b->rowners[b->rank];
2069a23d5eceSKris Buschelman   b->rend      = b->rowners[b->rank+1];
2070a23d5eceSKris Buschelman 
2071a23d5eceSKris Buschelman   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2072a23d5eceSKris Buschelman   b->cowners[0] = 0;
2073a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2074a23d5eceSKris Buschelman     b->cowners[i] += b->cowners[i-1];
2075a23d5eceSKris Buschelman   }
2076a23d5eceSKris Buschelman   b->cstart    = b->cowners[b->rank];
2077a23d5eceSKris Buschelman   b->cend      = b->cowners[b->rank+1];
2078a23d5eceSKris Buschelman 
2079a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2080a23d5eceSKris Buschelman     b->rowners_bs[i] = b->rowners[i]*bs;
2081a23d5eceSKris Buschelman   }
2082a23d5eceSKris Buschelman   b->rstart_bs = b->rstart*bs;
2083a23d5eceSKris Buschelman   b->rend_bs   = b->rend*bs;
2084a23d5eceSKris Buschelman   b->cstart_bs = b->cstart*bs;
2085a23d5eceSKris Buschelman   b->cend_bs   = b->cend*bs;
2086a23d5eceSKris Buschelman 
20879c097c71SKris Buschelman   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
20889c097c71SKris Buschelman   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2089c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
20909c097c71SKris Buschelman   PetscLogObjectParent(B,b->A);
20919c097c71SKris Buschelman   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
20929c097c71SKris Buschelman   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2093c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
20949c097c71SKris Buschelman   PetscLogObjectParent(B,b->B);
2095c60e587dSKris Buschelman 
2096a23d5eceSKris Buschelman   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2097a23d5eceSKris Buschelman 
2098a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2099a23d5eceSKris Buschelman }
2100a23d5eceSKris Buschelman EXTERN_C_END
2101a23d5eceSKris Buschelman 
2102a23d5eceSKris Buschelman EXTERN_C_BEGIN
2103dfbe8321SBarry Smith EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2104dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
210592b32695SKris Buschelman EXTERN_C_END
21065bf65638SKris Buschelman 
21070bad9183SKris Buschelman /*MC
2108fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
21090bad9183SKris Buschelman 
21100bad9183SKris Buschelman    Options Database Keys:
21110bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
21120bad9183SKris Buschelman 
21130bad9183SKris Buschelman   Level: beginner
21140bad9183SKris Buschelman 
21150bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ
21160bad9183SKris Buschelman M*/
21170bad9183SKris Buschelman 
211892b32695SKris Buschelman EXTERN_C_BEGIN
2119a23d5eceSKris Buschelman #undef __FUNCT__
21204a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ"
2121dfbe8321SBarry Smith PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2122273d9f13SBarry Smith {
2123273d9f13SBarry Smith   Mat_MPIBAIJ  *b;
2124dfbe8321SBarry Smith   PetscErrorCode ierr;
2125273d9f13SBarry Smith   PetscTruth   flg;
2126273d9f13SBarry Smith 
2127273d9f13SBarry Smith   PetscFunctionBegin;
212882502324SSatish Balay   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
212982502324SSatish Balay   B->data = (void*)b;
213082502324SSatish Balay 
2131273d9f13SBarry Smith   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2132273d9f13SBarry Smith   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2133273d9f13SBarry Smith   B->mapping    = 0;
2134273d9f13SBarry Smith   B->factor     = 0;
2135273d9f13SBarry Smith   B->assembled  = PETSC_FALSE;
2136273d9f13SBarry Smith 
2137273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
2138273d9f13SBarry Smith   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2139273d9f13SBarry Smith   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2140273d9f13SBarry Smith 
2141273d9f13SBarry Smith   /* build local table of row and column ownerships */
214282502324SSatish Balay   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2143b0a32e0cSBarry Smith   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2144273d9f13SBarry Smith   b->cowners    = b->rowners + b->size + 2;
2145273d9f13SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2146273d9f13SBarry Smith 
2147273d9f13SBarry Smith   /* build cache for off array entries formed */
2148273d9f13SBarry Smith   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2149273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
2150273d9f13SBarry Smith   b->colmap      = PETSC_NULL;
2151273d9f13SBarry Smith   b->garray      = PETSC_NULL;
2152273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2153273d9f13SBarry Smith 
2154cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
2155273d9f13SBarry Smith   /* stuff for MatSetValues_XXX in single precision */
215664a35ccbSBarry Smith   b->setvalueslen     = 0;
2157273d9f13SBarry Smith   b->setvaluescopy    = PETSC_NULL;
2158273d9f13SBarry Smith #endif
2159273d9f13SBarry Smith 
2160273d9f13SBarry Smith   /* stuff used in block assembly */
2161273d9f13SBarry Smith   b->barray       = 0;
2162273d9f13SBarry Smith 
2163273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2164273d9f13SBarry Smith   b->lvec         = 0;
2165273d9f13SBarry Smith   b->Mvctx        = 0;
2166273d9f13SBarry Smith 
2167273d9f13SBarry Smith   /* stuff for MatGetRow() */
2168273d9f13SBarry Smith   b->rowindices   = 0;
2169273d9f13SBarry Smith   b->rowvalues    = 0;
2170273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2171273d9f13SBarry Smith 
2172273d9f13SBarry Smith   /* hash table stuff */
2173273d9f13SBarry Smith   b->ht           = 0;
2174273d9f13SBarry Smith   b->hd           = 0;
2175273d9f13SBarry Smith   b->ht_size      = 0;
2176273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2177273d9f13SBarry Smith   b->ht_fact      = 0;
2178273d9f13SBarry Smith   b->ht_total_ct  = 0;
2179273d9f13SBarry Smith   b->ht_insert_ct = 0;
2180273d9f13SBarry Smith 
2181b0a32e0cSBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2182273d9f13SBarry Smith   if (flg) {
2183f6275e2eSBarry Smith     PetscReal fact = 1.39;
2184273d9f13SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
218587828ca2SBarry Smith     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2186273d9f13SBarry Smith     if (fact <= 1.0) fact = 1.39;
2187273d9f13SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2188b0a32e0cSBarry Smith     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2189273d9f13SBarry Smith   }
2190273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2191273d9f13SBarry Smith                                      "MatStoreValues_MPIBAIJ",
2192273d9f13SBarry Smith                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2193273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2194273d9f13SBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
2195273d9f13SBarry Smith                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2196273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2197273d9f13SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
2198273d9f13SBarry Smith                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2199a23d5eceSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2200a23d5eceSKris Buschelman                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2201a23d5eceSKris Buschelman                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
220292b32695SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
220392b32695SKris Buschelman                                      "MatDiagonalScaleLocal_MPIBAIJ",
220492b32695SKris Buschelman                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
22055bf65638SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
22065bf65638SKris Buschelman                                      "MatSetHashTableFactor_MPIBAIJ",
22075bf65638SKris Buschelman                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2208273d9f13SBarry Smith   PetscFunctionReturn(0);
2209273d9f13SBarry Smith }
2210273d9f13SBarry Smith EXTERN_C_END
2211273d9f13SBarry Smith 
2212209238afSKris Buschelman /*MC
2213002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2214209238afSKris Buschelman 
2215209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2216209238afSKris Buschelman    and MATMPIBAIJ otherwise.
2217209238afSKris Buschelman 
2218209238afSKris Buschelman    Options Database Keys:
2219209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2220209238afSKris Buschelman 
2221209238afSKris Buschelman   Level: beginner
2222209238afSKris Buschelman 
2223209238afSKris Buschelman .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2224209238afSKris Buschelman M*/
2225209238afSKris Buschelman 
2226209238afSKris Buschelman EXTERN_C_BEGIN
2227209238afSKris Buschelman #undef __FUNCT__
2228209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ"
2229dfbe8321SBarry Smith PetscErrorCode MatCreate_BAIJ(Mat A)
2230dfbe8321SBarry Smith {
22316849ba73SBarry Smith   PetscErrorCode ierr;
22326849ba73SBarry Smith   int size;
2233209238afSKris Buschelman 
2234209238afSKris Buschelman   PetscFunctionBegin;
2235209238afSKris Buschelman   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2236209238afSKris Buschelman   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2237209238afSKris Buschelman   if (size == 1) {
2238209238afSKris Buschelman     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2239209238afSKris Buschelman   } else {
2240209238afSKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2241209238afSKris Buschelman   }
2242209238afSKris Buschelman   PetscFunctionReturn(0);
2243209238afSKris Buschelman }
2244209238afSKris Buschelman EXTERN_C_END
2245209238afSKris Buschelman 
22464a2ae208SSatish Balay #undef __FUNCT__
22474a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2248273d9f13SBarry Smith /*@C
2249273d9f13SBarry Smith    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2250273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2251273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2252273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2253273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2254273d9f13SBarry Smith 
2255273d9f13SBarry Smith    Collective on Mat
2256273d9f13SBarry Smith 
2257273d9f13SBarry Smith    Input Parameters:
2258273d9f13SBarry Smith +  A - the matrix
2259273d9f13SBarry Smith .  bs   - size of blockk
2260273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2261273d9f13SBarry Smith            submatrix  (same for all local rows)
2262273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
2263273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
2264273d9f13SBarry Smith            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2265273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2266273d9f13SBarry Smith            submatrix (same for all local rows).
2267273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
2268273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
2269273d9f13SBarry Smith            each block row) or PETSC_NULL.
2270273d9f13SBarry Smith 
2271273d9f13SBarry Smith    Output Parameter:
2272273d9f13SBarry Smith 
2273273d9f13SBarry Smith 
2274273d9f13SBarry Smith    Options Database Keys:
2275273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2276273d9f13SBarry Smith                      block calculations (much slower)
2277273d9f13SBarry Smith .   -mat_block_size - size of the blocks to use
2278273d9f13SBarry Smith 
2279273d9f13SBarry Smith    Notes:
2280273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2281273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
2282273d9f13SBarry Smith 
2283273d9f13SBarry Smith    Storage Information:
2284273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
2285273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
2286273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
2287273d9f13SBarry Smith    local matrix (a rectangular submatrix).
2288273d9f13SBarry Smith 
2289273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
2290273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
2291273d9f13SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2292273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
2293273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2294273d9f13SBarry Smith 
2295273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2296273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
2297273d9f13SBarry Smith 
2298273d9f13SBarry Smith .vb
2299273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
2300273d9f13SBarry Smith           -------------------
2301273d9f13SBarry Smith    row 3  |  o o o d d d o o o o o o
2302273d9f13SBarry Smith    row 4  |  o o o d d d o o o o o o
2303273d9f13SBarry Smith    row 5  |  o o o d d d o o o o o o
2304273d9f13SBarry Smith           -------------------
2305273d9f13SBarry Smith .ve
2306273d9f13SBarry Smith 
2307273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
2308273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
2309273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2310273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
2311273d9f13SBarry Smith 
2312273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2313273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2314273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
2315273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2316273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
2317273d9f13SBarry Smith    matrices.
2318273d9f13SBarry Smith 
2319273d9f13SBarry Smith    Level: intermediate
2320273d9f13SBarry Smith 
2321273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel
2322273d9f13SBarry Smith 
2323273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2324273d9f13SBarry Smith @*/
2325dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2326273d9f13SBarry Smith {
2327dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2328273d9f13SBarry Smith 
2329273d9f13SBarry Smith   PetscFunctionBegin;
2330a23d5eceSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2331a23d5eceSKris Buschelman   if (f) {
2332a23d5eceSKris Buschelman     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2333273d9f13SBarry Smith   }
2334273d9f13SBarry Smith   PetscFunctionReturn(0);
2335273d9f13SBarry Smith }
2336273d9f13SBarry Smith 
23374a2ae208SSatish Balay #undef __FUNCT__
23384a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ"
233979bdfe76SSatish Balay /*@C
234079bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
234179bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
234279bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
234379bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
234479bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
234579bdfe76SSatish Balay 
2346db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
2347db81eaa0SLois Curfman McInnes 
234879bdfe76SSatish Balay    Input Parameters:
2349db81eaa0SLois Curfman McInnes +  comm - MPI communicator
235079bdfe76SSatish Balay .  bs   - size of blockk
235179bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
235292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
235392e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
235492e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
235592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
235692e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
2357be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2358be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
235947a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
236079bdfe76SSatish Balay            submatrix  (same for all local rows)
236147a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
236292e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2363db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
236447a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
236579bdfe76SSatish Balay            submatrix (same for all local rows).
236647a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
236792e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
236892e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
236979bdfe76SSatish Balay 
237079bdfe76SSatish Balay    Output Parameter:
237179bdfe76SSatish Balay .  A - the matrix
237279bdfe76SSatish Balay 
2373db81eaa0SLois Curfman McInnes    Options Database Keys:
2374db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
2375db81eaa0SLois Curfman McInnes                      block calculations (much slower)
2376db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
23773ffaccefSLois Curfman McInnes 
2378b259b22eSLois Curfman McInnes    Notes:
237947a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
238047a75d0bSBarry Smith 
238179bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
238279bdfe76SSatish Balay    (possibly both).
238379bdfe76SSatish Balay 
2384be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2385be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2386be79a94dSBarry Smith 
238779bdfe76SSatish Balay    Storage Information:
238879bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
238979bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
239079bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
239179bdfe76SSatish Balay    local matrix (a rectangular submatrix).
239279bdfe76SSatish Balay 
239379bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
239479bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
239579bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
239679bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
239779bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
239879bdfe76SSatish Balay 
239979bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
240079bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
240179bdfe76SSatish Balay 
2402db81eaa0SLois Curfman McInnes .vb
2403db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2404db81eaa0SLois Curfman McInnes           -------------------
2405db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2406db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2407db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2408db81eaa0SLois Curfman McInnes           -------------------
2409db81eaa0SLois Curfman McInnes .ve
241079bdfe76SSatish Balay 
241179bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
241279bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
241379bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
241457b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
241579bdfe76SSatish Balay 
2416d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2417d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
241879bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
241992e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
242092e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
24216da5968aSLois Curfman McInnes    matrices.
242279bdfe76SSatish Balay 
2423027ccd11SLois Curfman McInnes    Level: intermediate
2424027ccd11SLois Curfman McInnes 
242592e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
242679bdfe76SSatish Balay 
24273eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
242879bdfe76SSatish Balay @*/
2429dfbe8321SBarry Smith PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
243079bdfe76SSatish Balay {
24316849ba73SBarry Smith   PetscErrorCode ierr;
24326849ba73SBarry Smith   int size;
243379bdfe76SSatish Balay 
2434d64ed03dSBarry Smith   PetscFunctionBegin;
2435273d9f13SBarry Smith   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2436d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2437273d9f13SBarry Smith   if (size > 1) {
2438273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2439273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2440273d9f13SBarry Smith   } else {
2441273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2442273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
24433914022bSBarry Smith   }
24443a40ed3dSBarry Smith   PetscFunctionReturn(0);
244579bdfe76SSatish Balay }
2446026e39d0SSatish Balay 
24474a2ae208SSatish Balay #undef __FUNCT__
24484a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ"
24496849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
24500ac07820SSatish Balay {
24510ac07820SSatish Balay   Mat         mat;
24520ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2453dfbe8321SBarry Smith   PetscErrorCode ierr;
2454dfbe8321SBarry Smith   int len=0;
24550ac07820SSatish Balay 
2456d64ed03dSBarry Smith   PetscFunctionBegin;
24570ac07820SSatish Balay   *newmat       = 0;
2458273d9f13SBarry Smith   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2459be5d1d56SKris Buschelman   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
24601d5dac46SHong Zhang   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
24617fff6886SHong Zhang 
24624beb1cfeSHong Zhang   mat->factor       = matin->factor;
2463273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
24640ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
24657fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
24667fff6886SHong Zhang 
2467273d9f13SBarry Smith   a      = (Mat_MPIBAIJ*)mat->data;
24680ac07820SSatish Balay   a->bs  = oldmat->bs;
24690ac07820SSatish Balay   a->bs2 = oldmat->bs2;
24700ac07820SSatish Balay   a->mbs = oldmat->mbs;
24710ac07820SSatish Balay   a->nbs = oldmat->nbs;
24720ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
24730ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
24740ac07820SSatish Balay 
24750ac07820SSatish Balay   a->rstart       = oldmat->rstart;
24760ac07820SSatish Balay   a->rend         = oldmat->rend;
24770ac07820SSatish Balay   a->cstart       = oldmat->cstart;
24780ac07820SSatish Balay   a->cend         = oldmat->cend;
24790ac07820SSatish Balay   a->size         = oldmat->size;
24800ac07820SSatish Balay   a->rank         = oldmat->rank;
2481aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2482aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2483aef5e8e0SSatish Balay   a->rowindices   = 0;
24840ac07820SSatish Balay   a->rowvalues    = 0;
24850ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
248630793edcSSatish Balay   a->barray       = 0;
24873123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
24883123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
24893123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
24903123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
24910ac07820SSatish Balay 
2492133cdb44SSatish Balay   /* hash table stuff */
2493133cdb44SSatish Balay   a->ht           = 0;
2494133cdb44SSatish Balay   a->hd           = 0;
2495133cdb44SSatish Balay   a->ht_size      = 0;
2496133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
249725fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2498133cdb44SSatish Balay   a->ht_total_ct  = 0;
2499133cdb44SSatish Balay   a->ht_insert_ct = 0;
2500133cdb44SSatish Balay 
2501549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
25028798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
25038798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
25040ac07820SSatish Balay   if (oldmat->colmap) {
2505aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
25060f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
250748e59246SSatish Balay #else
250882502324SSatish Balay   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2509b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2510549d3d68SSatish Balay   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
251148e59246SSatish Balay #endif
25120ac07820SSatish Balay   } else a->colmap = 0;
25134beb1cfeSHong Zhang 
25140ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
251582502324SSatish Balay     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2516b0a32e0cSBarry Smith     PetscLogObjectMemory(mat,len*sizeof(int));
2517549d3d68SSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
25180ac07820SSatish Balay   } else a->garray = 0;
25190ac07820SSatish Balay 
25200ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2521b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->lvec);
25220ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2523b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->Mvctx);
25247fff6886SHong Zhang 
25252e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2526b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->A);
25272e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2528b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->B);
2529b0a32e0cSBarry Smith   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
25300ac07820SSatish Balay   *newmat = mat;
25314beb1cfeSHong Zhang 
25323a40ed3dSBarry Smith   PetscFunctionReturn(0);
25330ac07820SSatish Balay }
253457b952d6SSatish Balay 
2535e090d566SSatish Balay #include "petscsys.h"
253657b952d6SSatish Balay 
25374a2ae208SSatish Balay #undef __FUNCT__
25384a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ"
2539dfbe8321SBarry Smith PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
254057b952d6SSatish Balay {
254157b952d6SSatish Balay   Mat          A;
25426849ba73SBarry Smith   PetscErrorCode ierr;
25436849ba73SBarry Smith   int          i,nz,j,rstart,rend,fd;
254487828ca2SBarry Smith   PetscScalar  *vals,*buf;
254557b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
254657b952d6SSatish Balay   MPI_Status   status;
2547cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
254857b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2549f1af5d2fSBarry Smith   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
255057b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
255157b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
255257b952d6SSatish Balay 
2553d64ed03dSBarry Smith   PetscFunctionBegin;
2554b0a32e0cSBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
255557b952d6SSatish Balay 
2556d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2557d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
255857b952d6SSatish Balay   if (!rank) {
2559b0a32e0cSBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2560e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2561552e946dSBarry Smith     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2562d64ed03dSBarry Smith     if (header[3] < 0) {
256329bbc08cSBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2564d64ed03dSBarry Smith     }
25656c5fab8fSBarry Smith   }
2566d64ed03dSBarry Smith 
2567ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
256857b952d6SSatish Balay   M = header[1]; N = header[2];
256957b952d6SSatish Balay 
257029bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
257157b952d6SSatish Balay 
257257b952d6SSatish Balay   /*
257357b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
257457b952d6SSatish Balay      divisible by the blocksize
257557b952d6SSatish Balay   */
257657b952d6SSatish Balay   Mbs        = M/bs;
257757b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
257857b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
257957b952d6SSatish Balay   else                  Mbs++;
258057b952d6SSatish Balay   if (extra_rows &&!rank) {
2581b0a32e0cSBarry Smith     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
258257b952d6SSatish Balay   }
2583537820f0SBarry Smith 
258457b952d6SSatish Balay   /* determine ownership of all rows */
258557b952d6SSatish Balay   mbs        = Mbs/size + ((Mbs % size) > rank);
258657b952d6SSatish Balay   m          = mbs*bs;
2587b0a32e0cSBarry Smith   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2588cee3aa6bSSatish Balay   browners   = rowners + size + 1;
2589ca161407SBarry Smith   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
259057b952d6SSatish Balay   rowners[0] = 0;
2591cee3aa6bSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2592cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
259357b952d6SSatish Balay   rstart = rowners[rank];
259457b952d6SSatish Balay   rend   = rowners[rank+1];
259557b952d6SSatish Balay 
259657b952d6SSatish Balay   /* distribute row lengths to all processors */
259782502324SSatish Balay   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
259857b952d6SSatish Balay   if (!rank) {
2599b0a32e0cSBarry Smith     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2600e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
260157b952d6SSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
260282502324SSatish Balay     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2603cee3aa6bSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2604ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2605606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2606d64ed03dSBarry Smith   } else {
2607ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
260857b952d6SSatish Balay   }
260957b952d6SSatish Balay 
261057b952d6SSatish Balay   if (!rank) {
261157b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
261282502324SSatish Balay     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2613549d3d68SSatish Balay     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
261457b952d6SSatish Balay     for (i=0; i<size; i++) {
261557b952d6SSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
261657b952d6SSatish Balay         procsnz[i] += rowlengths[j];
261757b952d6SSatish Balay       }
261857b952d6SSatish Balay     }
2619606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
262057b952d6SSatish Balay 
262157b952d6SSatish Balay     /* determine max buffer needed and allocate it */
262257b952d6SSatish Balay     maxnz = 0;
262357b952d6SSatish Balay     for (i=0; i<size; i++) {
262457b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
262557b952d6SSatish Balay     }
262682502324SSatish Balay     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
262757b952d6SSatish Balay 
262857b952d6SSatish Balay     /* read in my part of the matrix column indices  */
262957b952d6SSatish Balay     nz     = procsnz[0];
263082502324SSatish Balay     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
263157b952d6SSatish Balay     mycols = ibuf;
2632cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2633e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2634cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2635cee3aa6bSSatish Balay 
263657b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
263757b952d6SSatish Balay     for (i=1; i<size-1; i++) {
263857b952d6SSatish Balay       nz   = procsnz[i];
2639e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2640ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
264157b952d6SSatish Balay     }
264257b952d6SSatish Balay     /* read in the stuff for the last proc */
264357b952d6SSatish Balay     if (size != 1) {
264457b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2645e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
264657b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2647ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
264857b952d6SSatish Balay     }
2649606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2650d64ed03dSBarry Smith   } else {
265157b952d6SSatish Balay     /* determine buffer space needed for message */
265257b952d6SSatish Balay     nz = 0;
265357b952d6SSatish Balay     for (i=0; i<m; i++) {
265457b952d6SSatish Balay       nz += locrowlens[i];
265557b952d6SSatish Balay     }
265682502324SSatish Balay     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
265757b952d6SSatish Balay     mycols = ibuf;
265857b952d6SSatish Balay     /* receive message of column indices*/
2659ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2660ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
266129bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
266257b952d6SSatish Balay   }
266357b952d6SSatish Balay 
266457b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
266582502324SSatish Balay   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2666cee3aa6bSSatish Balay   odlens   = dlens + (rend-rstart);
266782502324SSatish Balay   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2668549d3d68SSatish Balay   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
266957b952d6SSatish Balay   masked1  = mask    + Mbs;
267057b952d6SSatish Balay   masked2  = masked1 + Mbs;
267157b952d6SSatish Balay   rowcount = 0; nzcount = 0;
267257b952d6SSatish Balay   for (i=0; i<mbs; i++) {
267357b952d6SSatish Balay     dcount  = 0;
267457b952d6SSatish Balay     odcount = 0;
267557b952d6SSatish Balay     for (j=0; j<bs; j++) {
267657b952d6SSatish Balay       kmax = locrowlens[rowcount];
267757b952d6SSatish Balay       for (k=0; k<kmax; k++) {
267857b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
267957b952d6SSatish Balay         if (!mask[tmp]) {
268057b952d6SSatish Balay           mask[tmp] = 1;
268157b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
268257b952d6SSatish Balay           else masked1[dcount++] = tmp;
268357b952d6SSatish Balay         }
268457b952d6SSatish Balay       }
268557b952d6SSatish Balay       rowcount++;
268657b952d6SSatish Balay     }
2687cee3aa6bSSatish Balay 
268857b952d6SSatish Balay     dlens[i]  = dcount;
268957b952d6SSatish Balay     odlens[i] = odcount;
2690cee3aa6bSSatish Balay 
269157b952d6SSatish Balay     /* zero out the mask elements we set */
269257b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
269357b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
269457b952d6SSatish Balay   }
2695cee3aa6bSSatish Balay 
269657b952d6SSatish Balay   /* create our matrix */
269778ae41b4SKris Buschelman   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
269878ae41b4SKris Buschelman   ierr = MatSetType(A,type);CHKERRQ(ierr)
269978ae41b4SKris Buschelman   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
270078ae41b4SKris Buschelman 
270178ae41b4SKris Buschelman   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
27026d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
270357b952d6SSatish Balay 
270457b952d6SSatish Balay   if (!rank) {
270587828ca2SBarry Smith     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
270657b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
270757b952d6SSatish Balay     nz = procsnz[0];
270857b952d6SSatish Balay     vals = buf;
2709cee3aa6bSSatish Balay     mycols = ibuf;
2710cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2711e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2712cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2713537820f0SBarry Smith 
271457b952d6SSatish Balay     /* insert into matrix */
271557b952d6SSatish Balay     jj      = rstart*bs;
271657b952d6SSatish Balay     for (i=0; i<m; i++) {
2717b48991e4SBarry Smith       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
271857b952d6SSatish Balay       mycols += locrowlens[i];
271957b952d6SSatish Balay       vals   += locrowlens[i];
272057b952d6SSatish Balay       jj++;
272157b952d6SSatish Balay     }
272257b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
272357b952d6SSatish Balay     for (i=1; i<size-1; i++) {
272457b952d6SSatish Balay       nz   = procsnz[i];
272557b952d6SSatish Balay       vals = buf;
2726e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2727ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
272857b952d6SSatish Balay     }
272957b952d6SSatish Balay     /* the last proc */
273057b952d6SSatish Balay     if (size != 1){
273157b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2732cee3aa6bSSatish Balay       vals = buf;
2733e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
273457b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2735ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
273657b952d6SSatish Balay     }
2737606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2738d64ed03dSBarry Smith   } else {
273957b952d6SSatish Balay     /* receive numeric values */
274087828ca2SBarry Smith     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
274157b952d6SSatish Balay 
274257b952d6SSatish Balay     /* receive message of values*/
274357b952d6SSatish Balay     vals   = buf;
2744cee3aa6bSSatish Balay     mycols = ibuf;
2745ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2746ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
274729bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
274857b952d6SSatish Balay 
274957b952d6SSatish Balay     /* insert into matrix */
275057b952d6SSatish Balay     jj      = rstart*bs;
2751cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
2752b48991e4SBarry Smith       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
275357b952d6SSatish Balay       mycols += locrowlens[i];
275457b952d6SSatish Balay       vals   += locrowlens[i];
275557b952d6SSatish Balay       jj++;
275657b952d6SSatish Balay     }
275757b952d6SSatish Balay   }
2758606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2759606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2760606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2761606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2762606d414cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2763606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
27646d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
27656d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
276678ae41b4SKris Buschelman 
276778ae41b4SKris Buschelman   *newmat = A;
27683a40ed3dSBarry Smith   PetscFunctionReturn(0);
276957b952d6SSatish Balay }
277057b952d6SSatish Balay 
27714a2ae208SSatish Balay #undef __FUNCT__
27724a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2773133cdb44SSatish Balay /*@
2774133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2775133cdb44SSatish Balay 
2776133cdb44SSatish Balay    Input Parameters:
2777133cdb44SSatish Balay .  mat  - the matrix
2778133cdb44SSatish Balay .  fact - factor
2779133cdb44SSatish Balay 
2780fee21e36SBarry Smith    Collective on Mat
2781fee21e36SBarry Smith 
27828c890885SBarry Smith    Level: advanced
27838c890885SBarry Smith 
2784133cdb44SSatish Balay   Notes:
2785133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2786133cdb44SSatish Balay 
2787133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2788133cdb44SSatish Balay 
2789133cdb44SSatish Balay .seealso: MatSetOption()
2790133cdb44SSatish Balay @*/
2791dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2792133cdb44SSatish Balay {
2793dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscReal);
27945bf65638SKris Buschelman 
27955bf65638SKris Buschelman   PetscFunctionBegin;
27965bf65638SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
27975bf65638SKris Buschelman   if (f) {
27985bf65638SKris Buschelman     ierr = (*f)(mat,fact);CHKERRQ(ierr);
27995bf65638SKris Buschelman   }
28005bf65638SKris Buschelman   PetscFunctionReturn(0);
28015bf65638SKris Buschelman }
28025bf65638SKris Buschelman 
28035bf65638SKris Buschelman #undef __FUNCT__
28045bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2805dfbe8321SBarry Smith PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
28065bf65638SKris Buschelman {
280725fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2808133cdb44SSatish Balay 
2809133cdb44SSatish Balay   PetscFunctionBegin;
2810133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2811133cdb44SSatish Balay   baij->ht_fact = fact;
2812133cdb44SSatish Balay   PetscFunctionReturn(0);
2813133cdb44SSatish Balay }
2814f2a5309cSSatish Balay 
28154a2ae208SSatish Balay #undef __FUNCT__
28164a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2817dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2818f2a5309cSSatish Balay {
2819f2a5309cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2820f2a5309cSSatish Balay   PetscFunctionBegin;
2821f2a5309cSSatish Balay   *Ad     = a->A;
2822f2a5309cSSatish Balay   *Ao     = a->B;
2823195d93cdSBarry Smith   *colmap = a->garray;
2824f2a5309cSSatish Balay   PetscFunctionReturn(0);
2825f2a5309cSSatish Balay }
2826