xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 77e54ba9d2b3f34b049ed8d5734cc409f715452e)
1bba1ac68SSatish Balay /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
279bdfe76SSatish Balay 
3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
579bdfe76SSatish Balay 
6ca44d042SBarry Smith EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7ca44d042SBarry Smith EXTERN int DisAssemble_MPIBAIJ(Mat);
8268466fbSBarry Smith EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
9268466fbSBarry Smith EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
10f15d580aSBarry Smith EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
11f15d580aSBarry Smith EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
12f15d580aSBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
13268466fbSBarry Smith EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
14268466fbSBarry Smith EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
15ca44d042SBarry Smith EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16268466fbSBarry Smith EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
1793fea6afSBarry Smith 
1893fea6afSBarry Smith /*  UGLY, ugly, ugly
1987828ca2SBarry Smith    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
2093fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
2193fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2293fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2393fea6afSBarry Smith    into the single precision data structures.
2493fea6afSBarry Smith */
2593fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2634232ad1SSatish Balay EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2734232ad1SSatish Balay EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2834232ad1SSatish Balay EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2934232ad1SSatish Balay EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
3034232ad1SSatish Balay EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
3193fea6afSBarry Smith #else
326fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
3393fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
3493fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
356fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
366fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
3793fea6afSBarry Smith #endif
383b2fbd54SBarry Smith 
394a2ae208SSatish Balay #undef __FUNCT__
404a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
417843d17aSBarry Smith int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
427843d17aSBarry Smith {
437843d17aSBarry Smith   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
447843d17aSBarry Smith   int          ierr,i;
4587828ca2SBarry Smith   PetscScalar  *va,*vb;
467843d17aSBarry Smith   Vec          vtmp;
477843d17aSBarry Smith 
487843d17aSBarry Smith   PetscFunctionBegin;
497843d17aSBarry Smith 
507843d17aSBarry Smith   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51273d9f13SBarry Smith   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
527843d17aSBarry Smith 
53ac355199SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
547843d17aSBarry Smith   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55273d9f13SBarry Smith   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
567843d17aSBarry Smith 
57273d9f13SBarry Smith   for (i=0; i<A->m; i++){
58273d9f13SBarry Smith     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
597843d17aSBarry Smith   }
607843d17aSBarry Smith 
617843d17aSBarry Smith   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
627843d17aSBarry Smith   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63ac355199SBarry Smith   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
647843d17aSBarry Smith 
657843d17aSBarry Smith   PetscFunctionReturn(0);
667843d17aSBarry Smith }
677843d17aSBarry Smith 
687fc3c18eSBarry Smith EXTERN_C_BEGIN
694a2ae208SSatish Balay #undef __FUNCT__
704a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ"
717fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat)
727fc3c18eSBarry Smith {
737fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
747fc3c18eSBarry Smith   int         ierr;
757fc3c18eSBarry Smith 
767fc3c18eSBarry Smith   PetscFunctionBegin;
777fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
787fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
797fc3c18eSBarry Smith   PetscFunctionReturn(0);
807fc3c18eSBarry Smith }
817fc3c18eSBarry Smith EXTERN_C_END
827fc3c18eSBarry Smith 
837fc3c18eSBarry Smith EXTERN_C_BEGIN
844a2ae208SSatish Balay #undef __FUNCT__
854a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
867fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat)
877fc3c18eSBarry Smith {
887fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
897fc3c18eSBarry Smith   int         ierr;
907fc3c18eSBarry Smith 
917fc3c18eSBarry Smith   PetscFunctionBegin;
927fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
937fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
947fc3c18eSBarry Smith   PetscFunctionReturn(0);
957fc3c18eSBarry Smith }
967fc3c18eSBarry Smith EXTERN_C_END
977fc3c18eSBarry Smith 
98537820f0SBarry Smith /*
99537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
10057b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
10157b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
10257b952d6SSatish Balay    length of colmap equals the global matrix length.
10357b952d6SSatish Balay */
1044a2ae208SSatish Balay #undef __FUNCT__
1054a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
10657b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
10757b952d6SSatish Balay {
10857b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
10957b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110dc2900e9SSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
11157b952d6SSatish Balay 
112d64ed03dSBarry Smith   PetscFunctionBegin;
113aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
114f14a1c24SBarry Smith   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
11548e59246SSatish Balay   for (i=0; i<nbs; i++){
1160f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
11748e59246SSatish Balay   }
11848e59246SSatish Balay #else
11982502324SSatish Balay   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
120b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121549d3d68SSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
12348e59246SSatish Balay #endif
1243a40ed3dSBarry Smith   PetscFunctionReturn(0);
12557b952d6SSatish Balay }
12657b952d6SSatish Balay 
12780c1aa95SSatish Balay #define CHUNKSIZE  10
12880c1aa95SSatish Balay 
129f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
13080c1aa95SSatish Balay { \
13180c1aa95SSatish Balay  \
13280c1aa95SSatish Balay     brow = row/bs;  \
13380c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
13580c1aa95SSatish Balay       bcol = col/bs; \
13680c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
137ab26458aSBarry Smith       low = 0; high = nrow; \
138ab26458aSBarry Smith       while (high-low > 3) { \
139ab26458aSBarry Smith         t = (low+high)/2; \
140ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
141ab26458aSBarry Smith         else              low  = t; \
142ab26458aSBarry Smith       } \
143ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
14480c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
14580c1aa95SSatish Balay         if (rp[_i] == bcol) { \
14680c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
148eada6651SSatish Balay           else                    *bap  = value;  \
149ac7a638eSSatish Balay           goto a_noinsert; \
15080c1aa95SSatish Balay         } \
15180c1aa95SSatish Balay       } \
15289280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
153a45adfd6SMatthew Knepley       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
15480c1aa95SSatish Balay       if (nrow >= rmax) { \
15580c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
15680c1aa95SSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
1573eda8832SBarry Smith         MatScalar *new_a; \
15880c1aa95SSatish Balay  \
159a45adfd6SMatthew Knepley         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
16089280ab3SLois Curfman McInnes  \
16180c1aa95SSatish Balay         /* malloc new storage space */ \
1623eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
16382502324SSatish Balay         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
16480c1aa95SSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
16580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
16680c1aa95SSatish Balay  \
16780c1aa95SSatish Balay         /* copy over old data into new slots */ \
16880c1aa95SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
16980c1aa95SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
17180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
1723eda8832SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
1733eda8832SBarry Smith         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
17487828ca2SBarry Smith         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
1763eda8832SBarry Smith                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
17780c1aa95SSatish Balay         /* free up old matrix storage */ \
178606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179606d414cSSatish Balay         if (!a->singlemalloc) { \
180606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182606d414cSSatish Balay         } \
18380c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1847c922b88SBarry Smith         a->singlemalloc = PETSC_TRUE; \
18580c1aa95SSatish Balay  \
18680c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188b0a32e0cSBarry Smith         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
18980c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
19080c1aa95SSatish Balay         a->reallocs++; \
19180c1aa95SSatish Balay         a->nz++; \
19280c1aa95SSatish Balay       } \
19380c1aa95SSatish Balay       N = nrow++ - 1;  \
19480c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
19580c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
19680c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1973eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
19880c1aa95SSatish Balay       } \
1993eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
20080c1aa95SSatish Balay       rp[_i]                      = bcol;  \
20180c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
202ac7a638eSSatish Balay       a_noinsert:; \
20380c1aa95SSatish Balay     ailen[brow] = nrow; \
20480c1aa95SSatish Balay }
20557b952d6SSatish Balay 
206ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207ac7a638eSSatish Balay { \
208ac7a638eSSatish Balay     brow = row/bs;  \
209ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
211ac7a638eSSatish Balay       bcol = col/bs; \
212ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
213ac7a638eSSatish Balay       low = 0; high = nrow; \
214ac7a638eSSatish Balay       while (high-low > 3) { \
215ac7a638eSSatish Balay         t = (low+high)/2; \
216ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
217ac7a638eSSatish Balay         else              low  = t; \
218ac7a638eSSatish Balay       } \
219ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
220ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
221ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
222ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
224ac7a638eSSatish Balay           else                    *bap  = value;  \
225ac7a638eSSatish Balay           goto b_noinsert; \
226ac7a638eSSatish Balay         } \
227ac7a638eSSatish Balay       } \
22889280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
229a45adfd6SMatthew Knepley       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
230ac7a638eSSatish Balay       if (nrow >= rmax) { \
231ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
23274c639caSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
2333eda8832SBarry Smith         MatScalar *new_a; \
234ac7a638eSSatish Balay  \
235a45adfd6SMatthew Knepley         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
23689280ab3SLois Curfman McInnes  \
237ac7a638eSSatish Balay         /* malloc new storage space */ \
2383eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
23982502324SSatish Balay         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240ac7a638eSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
241ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
242ac7a638eSSatish Balay  \
243ac7a638eSSatish Balay         /* copy over old data into new slots */ \
244ac7a638eSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
24574c639caSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247ac7a638eSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
2483eda8832SBarry Smith         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
2493eda8832SBarry Smith         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
2503eda8832SBarry Smith         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
2523eda8832SBarry Smith                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253ac7a638eSSatish Balay         /* free up old matrix storage */ \
254606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255606d414cSSatish Balay         if (!b->singlemalloc) { \
256606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258606d414cSSatish Balay         } \
25974c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
2607c922b88SBarry Smith         b->singlemalloc = PETSC_TRUE; \
261ac7a638eSSatish Balay  \
262ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264b0a32e0cSBarry Smith         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
26574c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
26674c639caSSatish Balay         b->reallocs++; \
26774c639caSSatish Balay         b->nz++; \
268ac7a638eSSatish Balay       } \
269ac7a638eSSatish Balay       N = nrow++ - 1;  \
270ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
271ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
272ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2733eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274ac7a638eSSatish Balay       } \
2753eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
277ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
278ac7a638eSSatish Balay       b_noinsert:; \
279ac7a638eSSatish Balay     bilen[brow] = nrow; \
280ac7a638eSSatish Balay }
281ac7a638eSSatish Balay 
28293fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2834a2ae208SSatish Balay #undef __FUNCT__
2844a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ"
285f15d580aSBarry Smith int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
28657b952d6SSatish Balay {
2876fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
28893fea6afSBarry Smith   int         ierr,i,N = m*n;
2896fa18ffdSBarry Smith   MatScalar   *vsingle;
29093fea6afSBarry Smith 
29193fea6afSBarry Smith   PetscFunctionBegin;
2926fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2936fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
29482502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2956fa18ffdSBarry Smith     b->setvalueslen  = N;
2966fa18ffdSBarry Smith   }
2976fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2986fa18ffdSBarry Smith 
29993fea6afSBarry Smith   for (i=0; i<N; i++) {
30093fea6afSBarry Smith     vsingle[i] = v[i];
30193fea6afSBarry Smith   }
30293fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
30393fea6afSBarry Smith   PetscFunctionReturn(0);
30493fea6afSBarry Smith }
30593fea6afSBarry Smith 
3064a2ae208SSatish Balay #undef __FUNCT__
3074a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
308f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
30993fea6afSBarry Smith {
3106fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3116fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
3126fa18ffdSBarry Smith   MatScalar   *vsingle;
31393fea6afSBarry Smith 
31493fea6afSBarry Smith   PetscFunctionBegin;
3156fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3166fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
31782502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3186fa18ffdSBarry Smith     b->setvalueslen  = N;
3196fa18ffdSBarry Smith   }
3206fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
32193fea6afSBarry Smith   for (i=0; i<N; i++) {
32293fea6afSBarry Smith     vsingle[i] = v[i];
32393fea6afSBarry Smith   }
32493fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
32593fea6afSBarry Smith   PetscFunctionReturn(0);
32693fea6afSBarry Smith }
3276fa18ffdSBarry Smith 
3284a2ae208SSatish Balay #undef __FUNCT__
3294a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
330f15d580aSBarry Smith int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
3316fa18ffdSBarry Smith {
3326fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3336fa18ffdSBarry Smith   int         ierr,i,N = m*n;
3346fa18ffdSBarry Smith   MatScalar   *vsingle;
3356fa18ffdSBarry Smith 
3366fa18ffdSBarry Smith   PetscFunctionBegin;
3376fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3386fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
33982502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3406fa18ffdSBarry Smith     b->setvalueslen  = N;
3416fa18ffdSBarry Smith   }
3426fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3436fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3446fa18ffdSBarry Smith     vsingle[i] = v[i];
3456fa18ffdSBarry Smith   }
3466fa18ffdSBarry Smith   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3476fa18ffdSBarry Smith   PetscFunctionReturn(0);
3486fa18ffdSBarry Smith }
3496fa18ffdSBarry Smith 
3504a2ae208SSatish Balay #undef __FUNCT__
3514a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
352f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
3536fa18ffdSBarry Smith {
3546fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3556fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
3566fa18ffdSBarry Smith   MatScalar   *vsingle;
3576fa18ffdSBarry Smith 
3586fa18ffdSBarry Smith   PetscFunctionBegin;
3596fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3606fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
36182502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3626fa18ffdSBarry Smith     b->setvalueslen  = N;
3636fa18ffdSBarry Smith   }
3646fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3656fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3666fa18ffdSBarry Smith     vsingle[i] = v[i];
3676fa18ffdSBarry Smith   }
3686fa18ffdSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3696fa18ffdSBarry Smith   PetscFunctionReturn(0);
3706fa18ffdSBarry Smith }
37193fea6afSBarry Smith #endif
37293fea6afSBarry Smith 
3734a2ae208SSatish Balay #undef __FUNCT__
374e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
375f15d580aSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
37693fea6afSBarry Smith {
37757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
37893fea6afSBarry Smith   MatScalar   value;
379273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
3804fa0d573SSatish Balay   int         ierr,i,j,row,col;
381273d9f13SBarry Smith   int         rstart_orig=baij->rstart_bs;
3824fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
3834fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
38457b952d6SSatish Balay 
385eada6651SSatish Balay   /* Some Variables required in the macro */
38680c1aa95SSatish Balay   Mat         A = baij->A;
38780c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
388ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
3893eda8832SBarry Smith   MatScalar   *aa=a->a;
390ac7a638eSSatish Balay 
391ac7a638eSSatish Balay   Mat         B = baij->B;
392ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
393ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3943eda8832SBarry Smith   MatScalar   *ba=b->a;
395ac7a638eSSatish Balay 
396ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
397ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
3983eda8832SBarry Smith   MatScalar   *ap,*bap;
39980c1aa95SSatish Balay 
400d64ed03dSBarry Smith   PetscFunctionBegin;
40157b952d6SSatish Balay   for (i=0; i<m; i++) {
4025ef9f2a5SBarry Smith     if (im[i] < 0) continue;
403aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
404590ac198SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
405639f9d9dSBarry Smith #endif
40657b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
40757b952d6SSatish Balay       row = im[i] - rstart_orig;
40857b952d6SSatish Balay       for (j=0; j<n; j++) {
40957b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
41057b952d6SSatish Balay           col = in[j] - cstart_orig;
41157b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
41380c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
41473959e64SBarry Smith         } else if (in[j] < 0) continue;
415aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
416590ac198SBarry Smith         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
417639f9d9dSBarry Smith #endif
41857b952d6SSatish Balay         else {
41957b952d6SSatish Balay           if (mat->was_assembled) {
420905e6a2fSBarry Smith             if (!baij->colmap) {
421905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
422905e6a2fSBarry Smith             }
423aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4240f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
425bba1ac68SSatish Balay             col  = col - 1;
42648e59246SSatish Balay #else
427bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
42848e59246SSatish Balay #endif
42957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
43057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4318295de27SSatish Balay               col =  in[j];
4329bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
4339bf004c3SSatish Balay               B = baij->B;
4349bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
4359bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
4369bf004c3SSatish Balay               ba=b->a;
437bba1ac68SSatish Balay             } else col += in[j]%bs;
4388295de27SSatish Balay           } else col = in[j];
43957b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
44090da58bdSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
44190da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
44257b952d6SSatish Balay         }
44357b952d6SSatish Balay       }
444d64ed03dSBarry Smith     } else {
44590f02eecSBarry Smith       if (!baij->donotstash) {
446ff2fd236SBarry Smith         if (roworiented) {
4476fa18ffdSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448ff2fd236SBarry Smith         } else {
4496fa18ffdSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
45057b952d6SSatish Balay         }
45157b952d6SSatish Balay       }
45257b952d6SSatish Balay     }
45390f02eecSBarry Smith   }
4543a40ed3dSBarry Smith   PetscFunctionReturn(0);
45557b952d6SSatish Balay }
45657b952d6SSatish Balay 
4574a2ae208SSatish Balay #undef __FUNCT__
4584a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
459f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
460ab26458aSBarry Smith {
461ab26458aSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
462f15d580aSBarry Smith   const MatScalar *value;
463f15d580aSBarry Smith   MatScalar       *barray=baij->barray;
464273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
465273d9f13SBarry Smith   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
466ab26458aSBarry Smith   int             rend=baij->rend,cstart=baij->cstart,stepval;
467ab26458aSBarry Smith   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
468ab26458aSBarry Smith 
469b16ae2b1SBarry Smith   PetscFunctionBegin;
47030793edcSSatish Balay   if(!barray) {
47182502324SSatish Balay     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
47282502324SSatish Balay     baij->barray = barray;
47330793edcSSatish Balay   }
47430793edcSSatish Balay 
475ab26458aSBarry Smith   if (roworiented) {
476ab26458aSBarry Smith     stepval = (n-1)*bs;
477ab26458aSBarry Smith   } else {
478ab26458aSBarry Smith     stepval = (m-1)*bs;
479ab26458aSBarry Smith   }
480ab26458aSBarry Smith   for (i=0; i<m; i++) {
4815ef9f2a5SBarry Smith     if (im[i] < 0) continue;
482aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
483590ac198SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
484ab26458aSBarry Smith #endif
485ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
486ab26458aSBarry Smith       row = im[i] - rstart;
487ab26458aSBarry Smith       for (j=0; j<n; j++) {
48815b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
48915b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
490f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
49115b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
492f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
49315b57d14SSatish Balay         } else { /* Here a copy is required */
494ab26458aSBarry Smith           if (roworiented) {
495ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
496ab26458aSBarry Smith           } else {
497ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
498abef11f7SSatish Balay           }
49947513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
50047513183SBarry Smith             for (jj=0; jj<bs; jj++) {
50130793edcSSatish Balay               *barray++  = *value++;
50247513183SBarry Smith             }
50347513183SBarry Smith           }
50430793edcSSatish Balay           barray -=bs2;
50515b57d14SSatish Balay         }
506abef11f7SSatish Balay 
507abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
508abef11f7SSatish Balay           col  = in[j] - cstart;
50993fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
510ab26458aSBarry Smith         }
5115ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
512aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
513590ac198SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
514ab26458aSBarry Smith #endif
515ab26458aSBarry Smith         else {
516ab26458aSBarry Smith           if (mat->was_assembled) {
517ab26458aSBarry Smith             if (!baij->colmap) {
518ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
519ab26458aSBarry Smith             }
520a5eb4965SSatish Balay 
521aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
522aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
523fa46199cSSatish Balay             { int data;
5240f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
52529bbc08cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
526fa46199cSSatish Balay             }
52748e59246SSatish Balay #else
52829bbc08cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
529a5eb4965SSatish Balay #endif
53048e59246SSatish Balay #endif
531aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
5320f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
533fa46199cSSatish Balay             col  = (col - 1)/bs;
53448e59246SSatish Balay #else
535a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
53648e59246SSatish Balay #endif
537ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
538ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
539ab26458aSBarry Smith               col =  in[j];
540ab26458aSBarry Smith             }
541ab26458aSBarry Smith           }
542ab26458aSBarry Smith           else col = in[j];
54393fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
544ab26458aSBarry Smith         }
545ab26458aSBarry Smith       }
546d64ed03dSBarry Smith     } else {
547ab26458aSBarry Smith       if (!baij->donotstash) {
548ff2fd236SBarry Smith         if (roworiented) {
5496fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
550ff2fd236SBarry Smith         } else {
5516fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
552ff2fd236SBarry Smith         }
553abef11f7SSatish Balay       }
554ab26458aSBarry Smith     }
555ab26458aSBarry Smith   }
5563a40ed3dSBarry Smith   PetscFunctionReturn(0);
557ab26458aSBarry Smith }
5586fa18ffdSBarry Smith 
5590bdbc534SSatish Balay #define HASH_KEY 0.6180339887
560c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
5616fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
5634a2ae208SSatish Balay #undef __FUNCT__
5644a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
565f15d580aSBarry Smith int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
5660bdbc534SSatish Balay {
5670bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
568273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
5690bdbc534SSatish Balay   int         ierr,i,j,row,col;
570273d9f13SBarry Smith   int         rstart_orig=baij->rstart_bs;
571c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
572c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
573329f5518SBarry Smith   PetscReal   tmp;
5743eda8832SBarry Smith   MatScalar   **HD = baij->hd,value;
575aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5764a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5774a15367fSSatish Balay #endif
5780bdbc534SSatish Balay 
5790bdbc534SSatish Balay   PetscFunctionBegin;
5800bdbc534SSatish Balay 
5810bdbc534SSatish Balay   for (i=0; i<m; i++) {
582aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
58329bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
584590ac198SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
5850bdbc534SSatish Balay #endif
5860bdbc534SSatish Balay       row = im[i];
587c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
5880bdbc534SSatish Balay       for (j=0; j<n; j++) {
5890bdbc534SSatish Balay         col = in[j];
5906fa18ffdSBarry Smith         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5910bdbc534SSatish Balay         /* Look up into the Hash Table */
592c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
593c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5940bdbc534SSatish Balay 
595c2760754SSatish Balay 
596c2760754SSatish Balay         idx = h1;
597aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
598187ce0cbSSatish Balay         insert_ct++;
599187ce0cbSSatish Balay         total_ct++;
600187ce0cbSSatish Balay         if (HT[idx] != key) {
601187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
602187ce0cbSSatish Balay           if (idx == size) {
603187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
604187ce0cbSSatish Balay             if (idx == h1) {
605a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
606187ce0cbSSatish Balay             }
607187ce0cbSSatish Balay           }
608187ce0cbSSatish Balay         }
609187ce0cbSSatish Balay #else
610c2760754SSatish Balay         if (HT[idx] != key) {
611c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
612c2760754SSatish Balay           if (idx == size) {
613c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
614c2760754SSatish Balay             if (idx == h1) {
615a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
616c2760754SSatish Balay             }
617c2760754SSatish Balay           }
618c2760754SSatish Balay         }
619187ce0cbSSatish Balay #endif
620c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
621c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
622c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
6230bdbc534SSatish Balay       }
6240bdbc534SSatish Balay     } else {
6250bdbc534SSatish Balay       if (!baij->donotstash) {
626ff2fd236SBarry Smith         if (roworiented) {
6278798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
628ff2fd236SBarry Smith         } else {
6298798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
6300bdbc534SSatish Balay         }
6310bdbc534SSatish Balay       }
6320bdbc534SSatish Balay     }
6330bdbc534SSatish Balay   }
634aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
635187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
636187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
637187ce0cbSSatish Balay #endif
6380bdbc534SSatish Balay   PetscFunctionReturn(0);
6390bdbc534SSatish Balay }
6400bdbc534SSatish Balay 
6414a2ae208SSatish Balay #undef __FUNCT__
6424a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
643f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
6440bdbc534SSatish Balay {
6450bdbc534SSatish Balay   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
646273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
6478798bf22SSatish Balay   int             ierr,i,j,ii,jj,row,col;
648273d9f13SBarry Smith   int             rstart=baij->rstart ;
649b4cc0f5aSSatish Balay   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
650c2760754SSatish Balay   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
651329f5518SBarry Smith   PetscReal       tmp;
6523eda8832SBarry Smith   MatScalar       **HD = baij->hd,*baij_a;
653f15d580aSBarry Smith   const MatScalar *v_t,*value;
654aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
6554a15367fSSatish Balay   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
6564a15367fSSatish Balay #endif
6570bdbc534SSatish Balay 
658d0a41580SSatish Balay   PetscFunctionBegin;
659d0a41580SSatish Balay 
6600bdbc534SSatish Balay   if (roworiented) {
6610bdbc534SSatish Balay     stepval = (n-1)*bs;
6620bdbc534SSatish Balay   } else {
6630bdbc534SSatish Balay     stepval = (m-1)*bs;
6640bdbc534SSatish Balay   }
6650bdbc534SSatish Balay   for (i=0; i<m; i++) {
666aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
667590ac198SBarry Smith     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
668590ac198SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
6690bdbc534SSatish Balay #endif
6700bdbc534SSatish Balay     row   = im[i];
671187ce0cbSSatish Balay     v_t   = v + i*bs2;
672c2760754SSatish Balay     if (row >= rstart && row < rend) {
6730bdbc534SSatish Balay       for (j=0; j<n; j++) {
6740bdbc534SSatish Balay         col = in[j];
6750bdbc534SSatish Balay 
6760bdbc534SSatish Balay         /* Look up into the Hash Table */
677c2760754SSatish Balay         key = row*Nbs+col+1;
678c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6790bdbc534SSatish Balay 
680c2760754SSatish Balay         idx = h1;
681aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
682187ce0cbSSatish Balay         total_ct++;
683187ce0cbSSatish Balay         insert_ct++;
684187ce0cbSSatish Balay        if (HT[idx] != key) {
685187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
686187ce0cbSSatish Balay           if (idx == size) {
687187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
688187ce0cbSSatish Balay             if (idx == h1) {
689a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
690187ce0cbSSatish Balay             }
691187ce0cbSSatish Balay           }
692187ce0cbSSatish Balay         }
693187ce0cbSSatish Balay #else
694c2760754SSatish Balay         if (HT[idx] != key) {
695c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
696c2760754SSatish Balay           if (idx == size) {
697c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
698c2760754SSatish Balay             if (idx == h1) {
699a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
700c2760754SSatish Balay             }
701c2760754SSatish Balay           }
702c2760754SSatish Balay         }
703187ce0cbSSatish Balay #endif
704c2760754SSatish Balay         baij_a = HD[idx];
7050bdbc534SSatish Balay         if (roworiented) {
706c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
707187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
708187ce0cbSSatish Balay           value = v_t;
709187ce0cbSSatish Balay           v_t  += bs;
710fef45726SSatish Balay           if (addv == ADD_VALUES) {
711c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
712c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
713fef45726SSatish Balay                 baij_a[jj]  += *value++;
714b4cc0f5aSSatish Balay               }
715b4cc0f5aSSatish Balay             }
716fef45726SSatish Balay           } else {
717c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
718c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
719fef45726SSatish Balay                 baij_a[jj]  = *value++;
720fef45726SSatish Balay               }
721fef45726SSatish Balay             }
722fef45726SSatish Balay           }
7230bdbc534SSatish Balay         } else {
7240bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
725fef45726SSatish Balay           if (addv == ADD_VALUES) {
726b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
7270bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
728fef45726SSatish Balay                 baij_a[jj]  += *value++;
729fef45726SSatish Balay               }
730fef45726SSatish Balay             }
731fef45726SSatish Balay           } else {
732fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
733fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
734fef45726SSatish Balay                 baij_a[jj]  = *value++;
735fef45726SSatish Balay               }
736b4cc0f5aSSatish Balay             }
7370bdbc534SSatish Balay           }
7380bdbc534SSatish Balay         }
7390bdbc534SSatish Balay       }
7400bdbc534SSatish Balay     } else {
7410bdbc534SSatish Balay       if (!baij->donotstash) {
7420bdbc534SSatish Balay         if (roworiented) {
7438798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7440bdbc534SSatish Balay         } else {
7458798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7460bdbc534SSatish Balay         }
7470bdbc534SSatish Balay       }
7480bdbc534SSatish Balay     }
7490bdbc534SSatish Balay   }
750aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
751187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
752187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
753187ce0cbSSatish Balay #endif
7540bdbc534SSatish Balay   PetscFunctionReturn(0);
7550bdbc534SSatish Balay }
756133cdb44SSatish Balay 
7574a2ae208SSatish Balay #undef __FUNCT__
7584a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ"
759f15d580aSBarry Smith int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
760d6de1c52SSatish Balay {
761d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
762d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
76348e59246SSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
764d6de1c52SSatish Balay 
765133cdb44SSatish Balay   PetscFunctionBegin;
766d6de1c52SSatish Balay   for (i=0; i<m; i++) {
767590ac198SBarry Smith     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
768590ac198SBarry Smith     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
769d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
770d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
771d6de1c52SSatish Balay       for (j=0; j<n; j++) {
772590ac198SBarry Smith         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
773590ac198SBarry Smith         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
774d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
775d6de1c52SSatish Balay           col = idxn[j] - bscstart;
77698dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
777d64ed03dSBarry Smith         } else {
778905e6a2fSBarry Smith           if (!baij->colmap) {
779905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
780905e6a2fSBarry Smith           }
781aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7820f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
783fa46199cSSatish Balay           data --;
78448e59246SSatish Balay #else
78548e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
78648e59246SSatish Balay #endif
78748e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
788d9d09a02SSatish Balay           else {
78948e59246SSatish Balay             col  = data + idxn[j]%bs;
79098dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
791d6de1c52SSatish Balay           }
792d6de1c52SSatish Balay         }
793d6de1c52SSatish Balay       }
794d64ed03dSBarry Smith     } else {
79529bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
796d6de1c52SSatish Balay     }
797d6de1c52SSatish Balay   }
7983a40ed3dSBarry Smith  PetscFunctionReturn(0);
799d6de1c52SSatish Balay }
800d6de1c52SSatish Balay 
8014a2ae208SSatish Balay #undef __FUNCT__
8024a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ"
803064f8208SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
804d6de1c52SSatish Balay {
805d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
806d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
807acdf5bf4SSatish Balay   int        ierr,i,bs2=baij->bs2;
808329f5518SBarry Smith   PetscReal  sum = 0.0;
8093eda8832SBarry Smith   MatScalar  *v;
810d6de1c52SSatish Balay 
811d64ed03dSBarry Smith   PetscFunctionBegin;
812d6de1c52SSatish Balay   if (baij->size == 1) {
813064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
814d6de1c52SSatish Balay   } else {
815d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
816d6de1c52SSatish Balay       v = amat->a;
817d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++) {
818aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
819329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
820d6de1c52SSatish Balay #else
821d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
822d6de1c52SSatish Balay #endif
823d6de1c52SSatish Balay       }
824d6de1c52SSatish Balay       v = bmat->a;
825d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++) {
826aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
827329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
828d6de1c52SSatish Balay #else
829d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
830d6de1c52SSatish Balay #endif
831d6de1c52SSatish Balay       }
832064f8208SBarry Smith       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
833064f8208SBarry Smith       *nrm = sqrt(*nrm);
834d64ed03dSBarry Smith     } else {
83529bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
836d6de1c52SSatish Balay     }
837d64ed03dSBarry Smith   }
8383a40ed3dSBarry Smith   PetscFunctionReturn(0);
839d6de1c52SSatish Balay }
84057b952d6SSatish Balay 
841bd7f49f5SSatish Balay 
842fef45726SSatish Balay /*
843fef45726SSatish Balay   Creates the hash table, and sets the table
844fef45726SSatish Balay   This table is created only once.
845fef45726SSatish Balay   If new entried need to be added to the matrix
846fef45726SSatish Balay   then the hash table has to be destroyed and
847fef45726SSatish Balay   recreated.
848fef45726SSatish Balay */
8494a2ae208SSatish Balay #undef __FUNCT__
8504a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
851329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
852596b8d2eSBarry Smith {
853596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
854596b8d2eSBarry Smith   Mat         A = baij->A,B=baij->B;
855596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
8560bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
857549d3d68SSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
858187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
859fef45726SSatish Balay   int         *HT,key;
8603eda8832SBarry Smith   MatScalar   **HD;
861329f5518SBarry Smith   PetscReal   tmp;
862aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
8634a15367fSSatish Balay   int         ct=0,max=0;
8644a15367fSSatish Balay #endif
865fef45726SSatish Balay 
866d64ed03dSBarry Smith   PetscFunctionBegin;
8670bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
8680bdbc534SSatish Balay   size = baij->ht_size;
869fef45726SSatish Balay 
8700bdbc534SSatish Balay   if (baij->ht) {
8710bdbc534SSatish Balay     PetscFunctionReturn(0);
872596b8d2eSBarry Smith   }
8730bdbc534SSatish Balay 
874fef45726SSatish Balay   /* Allocate Memory for Hash Table */
875b0a32e0cSBarry Smith   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
876b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
877b9e4cc15SSatish Balay   HD       = baij->hd;
878a07cd24cSSatish Balay   HT       = baij->ht;
879b9e4cc15SSatish Balay 
880b9e4cc15SSatish Balay 
88187828ca2SBarry Smith   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
8820bdbc534SSatish Balay 
883596b8d2eSBarry Smith 
884596b8d2eSBarry Smith   /* Loop Over A */
8850bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
886596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
8870bdbc534SSatish Balay       row = i+rstart;
8880bdbc534SSatish Balay       col = aj[j]+cstart;
889596b8d2eSBarry Smith 
890187ce0cbSSatish Balay       key = row*Nbs + col + 1;
891c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8920bdbc534SSatish Balay       for (k=0; k<size; k++){
8930bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8940bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8950bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
896596b8d2eSBarry Smith           break;
897aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
898187ce0cbSSatish Balay         } else {
899187ce0cbSSatish Balay           ct++;
900187ce0cbSSatish Balay #endif
901596b8d2eSBarry Smith         }
902187ce0cbSSatish Balay       }
903aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
904187ce0cbSSatish Balay       if (k> max) max = k;
905187ce0cbSSatish Balay #endif
906596b8d2eSBarry Smith     }
907596b8d2eSBarry Smith   }
908596b8d2eSBarry Smith   /* Loop Over B */
9090bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
910596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
9110bdbc534SSatish Balay       row = i+rstart;
9120bdbc534SSatish Balay       col = garray[bj[j]];
913187ce0cbSSatish Balay       key = row*Nbs + col + 1;
914c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9150bdbc534SSatish Balay       for (k=0; k<size; k++){
9160bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
9170bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9180bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
919596b8d2eSBarry Smith           break;
920aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
921187ce0cbSSatish Balay         } else {
922187ce0cbSSatish Balay           ct++;
923187ce0cbSSatish Balay #endif
924596b8d2eSBarry Smith         }
925187ce0cbSSatish Balay       }
926aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
927187ce0cbSSatish Balay       if (k> max) max = k;
928187ce0cbSSatish Balay #endif
929596b8d2eSBarry Smith     }
930596b8d2eSBarry Smith   }
931596b8d2eSBarry Smith 
932596b8d2eSBarry Smith   /* Print Summary */
933aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
934c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
935596b8d2eSBarry Smith     if (HT[i]) {j++;}
936c38d4ed2SBarry Smith   }
937b0a32e0cSBarry Smith   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
938187ce0cbSSatish Balay #endif
9393a40ed3dSBarry Smith   PetscFunctionReturn(0);
940596b8d2eSBarry Smith }
94157b952d6SSatish Balay 
9424a2ae208SSatish Balay #undef __FUNCT__
9434a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
944bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
945bbb85fb3SSatish Balay {
946bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
947ff2fd236SBarry Smith   int         ierr,nstash,reallocs;
948bbb85fb3SSatish Balay   InsertMode  addv;
949bbb85fb3SSatish Balay 
950bbb85fb3SSatish Balay   PetscFunctionBegin;
951bbb85fb3SSatish Balay   if (baij->donotstash) {
952bbb85fb3SSatish Balay     PetscFunctionReturn(0);
953bbb85fb3SSatish Balay   }
954bbb85fb3SSatish Balay 
955bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
956bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
957bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
95829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
959bbb85fb3SSatish Balay   }
960bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
961bbb85fb3SSatish Balay 
9628798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
9638798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
9648798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
965b0a32e0cSBarry Smith   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
96646680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
967b0a32e0cSBarry Smith   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
968bbb85fb3SSatish Balay   PetscFunctionReturn(0);
969bbb85fb3SSatish Balay }
970bbb85fb3SSatish Balay 
9714a2ae208SSatish Balay #undef __FUNCT__
9724a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
973bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
974bbb85fb3SSatish Balay {
975bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
976a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
977a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
9787c922b88SBarry Smith   int         *row,*col,other_disassembled;
9797c922b88SBarry Smith   PetscTruth  r1,r2,r3;
9803eda8832SBarry Smith   MatScalar   *val;
981bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
982bbb85fb3SSatish Balay 
983bbb85fb3SSatish Balay   PetscFunctionBegin;
984bbb85fb3SSatish Balay   if (!baij->donotstash) {
985a2d1c673SSatish Balay     while (1) {
9868798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
987a2d1c673SSatish Balay       if (!flg) break;
988a2d1c673SSatish Balay 
989bbb85fb3SSatish Balay       for (i=0; i<n;) {
990bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
991bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
992bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
993bbb85fb3SSatish Balay         else       ncols = n-i;
994bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
99593fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
996bbb85fb3SSatish Balay         i = j;
997bbb85fb3SSatish Balay       }
998bbb85fb3SSatish Balay     }
9998798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1000a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1001a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1002a2d1c673SSatish Balay        restore the original flags */
1003a2d1c673SSatish Balay     r1 = baij->roworiented;
1004a2d1c673SSatish Balay     r2 = a->roworiented;
1005a2d1c673SSatish Balay     r3 = b->roworiented;
10067c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
10077c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
10087c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
1009a2d1c673SSatish Balay     while (1) {
10108798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1011a2d1c673SSatish Balay       if (!flg) break;
1012a2d1c673SSatish Balay 
1013a2d1c673SSatish Balay       for (i=0; i<n;) {
1014a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1015a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1016a2d1c673SSatish Balay         if (j < n) ncols = j-i;
1017a2d1c673SSatish Balay         else       ncols = n-i;
101893fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1019a2d1c673SSatish Balay         i = j;
1020a2d1c673SSatish Balay       }
1021a2d1c673SSatish Balay     }
10228798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1023a2d1c673SSatish Balay     baij->roworiented = r1;
1024a2d1c673SSatish Balay     a->roworiented    = r2;
1025a2d1c673SSatish Balay     b->roworiented    = r3;
1026bbb85fb3SSatish Balay   }
1027bbb85fb3SSatish Balay 
1028bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1029bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1030bbb85fb3SSatish Balay 
1031bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
1032bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1033bbb85fb3SSatish Balay   /*
1034bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1035bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1036bbb85fb3SSatish Balay   */
1037bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1038bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1039bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1040bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1041bbb85fb3SSatish Balay     }
1042bbb85fb3SSatish Balay   }
1043bbb85fb3SSatish Balay 
1044bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1045bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1046bbb85fb3SSatish Balay   }
1047bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1048bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1049bbb85fb3SSatish Balay 
1050aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
1051bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1052f6275e2eSBarry Smith     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1053bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1054bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1055bbb85fb3SSatish Balay   }
1056bbb85fb3SSatish Balay #endif
1057bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1058bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1059bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1060bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1061bbb85fb3SSatish Balay   }
1062bbb85fb3SSatish Balay 
1063606d414cSSatish Balay   if (baij->rowvalues) {
1064606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1065606d414cSSatish Balay     baij->rowvalues = 0;
1066606d414cSSatish Balay   }
1067bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1068bbb85fb3SSatish Balay }
106957b952d6SSatish Balay 
10704a2ae208SSatish Balay #undef __FUNCT__
10714a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1072b0a32e0cSBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
107357b952d6SSatish Balay {
107457b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1075fb9695e5SSatish Balay   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
10766831982aSBarry Smith   PetscTruth        isascii,isdraw;
1077b0a32e0cSBarry Smith   PetscViewer       sviewer;
1078f3ef73ceSBarry Smith   PetscViewerFormat format;
107957b952d6SSatish Balay 
1080d64ed03dSBarry Smith   PetscFunctionBegin;
1081f81bd7ccSHong Zhang   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1082b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1083fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
10840f5bd95cSBarry Smith   if (isascii) {
1085b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1086456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
10874e220ebcSLois Curfman McInnes       MatInfo info;
1088d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1089d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1090b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1091273d9f13SBarry Smith               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
10926831982aSBarry Smith               baij->bs,(int)info.memory);CHKERRQ(ierr);
1093d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1094b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1095d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1096b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1097b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
109857b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
10993a40ed3dSBarry Smith       PetscFunctionReturn(0);
1100fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1101b0a32e0cSBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
11023a40ed3dSBarry Smith       PetscFunctionReturn(0);
110304929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
110404929863SHong Zhang       PetscFunctionReturn(0);
110557b952d6SSatish Balay     }
110657b952d6SSatish Balay   }
110757b952d6SSatish Balay 
11080f5bd95cSBarry Smith   if (isdraw) {
1109b0a32e0cSBarry Smith     PetscDraw       draw;
111057b952d6SSatish Balay     PetscTruth isnull;
1111b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1112b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
111357b952d6SSatish Balay   }
111457b952d6SSatish Balay 
111557b952d6SSatish Balay   if (size == 1) {
1116873048abSBarry Smith     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
111757b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1118d64ed03dSBarry Smith   } else {
111957b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
112057b952d6SSatish Balay     Mat         A;
112157b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1122273d9f13SBarry Smith     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
11233eda8832SBarry Smith     MatScalar   *a;
112457b952d6SSatish Balay 
112557b952d6SSatish Balay     if (!rank) {
112655843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1127d64ed03dSBarry Smith     } else {
112855843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
112957b952d6SSatish Balay     }
1130b0a32e0cSBarry Smith     PetscLogObjectParent(mat,A);
113157b952d6SSatish Balay 
113257b952d6SSatish Balay     /* copy over the A part */
113357b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
113457b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
113582502324SSatish Balay     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
113657b952d6SSatish Balay 
113757b952d6SSatish Balay     for (i=0; i<mbs; i++) {
113857b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
113957b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
114057b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
114157b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
114257b952d6SSatish Balay         for (k=0; k<bs; k++) {
114393fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1144cee3aa6bSSatish Balay           col++; a += bs;
114557b952d6SSatish Balay         }
114657b952d6SSatish Balay       }
114757b952d6SSatish Balay     }
114857b952d6SSatish Balay     /* copy over the B part */
114957b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
115057b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
115157b952d6SSatish Balay     for (i=0; i<mbs; i++) {
115257b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
115357b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
115457b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
115557b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
115657b952d6SSatish Balay         for (k=0; k<bs; k++) {
115793fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1158cee3aa6bSSatish Balay           col++; a += bs;
115957b952d6SSatish Balay         }
116057b952d6SSatish Balay       }
116157b952d6SSatish Balay     }
1162606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
11636d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
11646d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
116555843e3eSBarry Smith     /*
116655843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1167b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
116855843e3eSBarry Smith     */
1169b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1170f14a1c24SBarry Smith     if (!rank) {
1171e36acaf3SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
11726831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
117357b952d6SSatish Balay     }
1174b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
117557b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
117657b952d6SSatish Balay   }
11773a40ed3dSBarry Smith   PetscFunctionReturn(0);
117857b952d6SSatish Balay }
117957b952d6SSatish Balay 
11804a2ae208SSatish Balay #undef __FUNCT__
11814a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ"
1182b0a32e0cSBarry Smith int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
118357b952d6SSatish Balay {
118457b952d6SSatish Balay   int        ierr;
11856831982aSBarry Smith   PetscTruth isascii,isdraw,issocket,isbinary;
118657b952d6SSatish Balay 
1187d64ed03dSBarry Smith   PetscFunctionBegin;
1188b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1189fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1190b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1191fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
11920f5bd95cSBarry Smith   if (isascii || isdraw || issocket || isbinary) {
11937b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
11945cd90555SBarry Smith   } else {
119529bbc08cSBarry Smith     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
119657b952d6SSatish Balay   }
11973a40ed3dSBarry Smith   PetscFunctionReturn(0);
119857b952d6SSatish Balay }
119957b952d6SSatish Balay 
12004a2ae208SSatish Balay #undef __FUNCT__
12014a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ"
1202e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
120379bdfe76SSatish Balay {
120479bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
120579bdfe76SSatish Balay   int         ierr;
120679bdfe76SSatish Balay 
1207d64ed03dSBarry Smith   PetscFunctionBegin;
1208aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1209b0a32e0cSBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
121079bdfe76SSatish Balay #endif
12118798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12128798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1213606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
121479bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
121579bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1216aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
12170f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
121848e59246SSatish Balay #else
1219606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
122048e59246SSatish Balay #endif
1221606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1222606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1223606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1224606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1225606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1226606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
12276fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
12286fa18ffdSBarry Smith   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
12296fa18ffdSBarry Smith #endif
1230606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
12313a40ed3dSBarry Smith   PetscFunctionReturn(0);
123279bdfe76SSatish Balay }
123379bdfe76SSatish Balay 
12344a2ae208SSatish Balay #undef __FUNCT__
12354a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ"
1236ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1237cee3aa6bSSatish Balay {
1238cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
123947b4a8eaSLois Curfman McInnes   int         ierr,nt;
1240cee3aa6bSSatish Balay 
1241d64ed03dSBarry Smith   PetscFunctionBegin;
1242e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1243273d9f13SBarry Smith   if (nt != A->n) {
124429bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
124547b4a8eaSLois Curfman McInnes   }
1246e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1247273d9f13SBarry Smith   if (nt != A->m) {
124829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
124947b4a8eaSLois Curfman McInnes   }
125043a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1251f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
125243a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1253f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
125443a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
12553a40ed3dSBarry Smith   PetscFunctionReturn(0);
1256cee3aa6bSSatish Balay }
1257cee3aa6bSSatish Balay 
12584a2ae208SSatish Balay #undef __FUNCT__
12594a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1260ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1261cee3aa6bSSatish Balay {
1262cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1263cee3aa6bSSatish Balay   int        ierr;
1264d64ed03dSBarry Smith 
1265d64ed03dSBarry Smith   PetscFunctionBegin;
126643a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1267f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
126843a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1269f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12703a40ed3dSBarry Smith   PetscFunctionReturn(0);
1271cee3aa6bSSatish Balay }
1272cee3aa6bSSatish Balay 
12734a2ae208SSatish Balay #undef __FUNCT__
12744a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
12757c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1276cee3aa6bSSatish Balay {
1277cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1278cee3aa6bSSatish Balay   int         ierr;
1279cee3aa6bSSatish Balay 
1280d64ed03dSBarry Smith   PetscFunctionBegin;
1281cee3aa6bSSatish Balay   /* do nondiagonal part */
12827c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1283cee3aa6bSSatish Balay   /* send it on its way */
1284537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1285cee3aa6bSSatish Balay   /* do local part */
12867c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1287cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1288cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1289cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1290639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12913a40ed3dSBarry Smith   PetscFunctionReturn(0);
1292cee3aa6bSSatish Balay }
1293cee3aa6bSSatish Balay 
12944a2ae208SSatish Balay #undef __FUNCT__
12954a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
12967c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1297cee3aa6bSSatish Balay {
1298cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1299cee3aa6bSSatish Balay   int         ierr;
1300cee3aa6bSSatish Balay 
1301d64ed03dSBarry Smith   PetscFunctionBegin;
1302cee3aa6bSSatish Balay   /* do nondiagonal part */
13037c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1304cee3aa6bSSatish Balay   /* send it on its way */
1305537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1306cee3aa6bSSatish Balay   /* do local part */
13077c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1308cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1309cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1310cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1311537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
13123a40ed3dSBarry Smith   PetscFunctionReturn(0);
1313cee3aa6bSSatish Balay }
1314cee3aa6bSSatish Balay 
1315cee3aa6bSSatish Balay /*
1316cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1317cee3aa6bSSatish Balay    diagonal block
1318cee3aa6bSSatish Balay */
13194a2ae208SSatish Balay #undef __FUNCT__
13204a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1321ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1322cee3aa6bSSatish Balay {
1323cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
13243a40ed3dSBarry Smith   int         ierr;
1325d64ed03dSBarry Smith 
1326d64ed03dSBarry Smith   PetscFunctionBegin;
1327273d9f13SBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13283a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13293a40ed3dSBarry Smith   PetscFunctionReturn(0);
1330cee3aa6bSSatish Balay }
1331cee3aa6bSSatish Balay 
13324a2ae208SSatish Balay #undef __FUNCT__
13334a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ"
1334268466fbSBarry Smith int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1335cee3aa6bSSatish Balay {
1336cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1337cee3aa6bSSatish Balay   int         ierr;
1338d64ed03dSBarry Smith 
1339d64ed03dSBarry Smith   PetscFunctionBegin;
1340cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1341cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
13423a40ed3dSBarry Smith   PetscFunctionReturn(0);
1343cee3aa6bSSatish Balay }
1344026e39d0SSatish Balay 
13454a2ae208SSatish Balay #undef __FUNCT__
13464a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ"
134787828ca2SBarry Smith int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1348acdf5bf4SSatish Balay {
1349acdf5bf4SSatish Balay   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
135087828ca2SBarry Smith   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1351acdf5bf4SSatish Balay   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1352d9d09a02SSatish Balay   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1353d9d09a02SSatish Balay   int          *cmap,*idx_p,cstart = mat->cstart;
1354acdf5bf4SSatish Balay 
1355d64ed03dSBarry Smith   PetscFunctionBegin;
135629bbc08cSBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1357acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1358acdf5bf4SSatish Balay 
1359acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1360acdf5bf4SSatish Balay     /*
1361acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1362acdf5bf4SSatish Balay     */
1363acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1364bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1365bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1366acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1367acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1368acdf5bf4SSatish Balay     }
136987828ca2SBarry Smith     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1370acdf5bf4SSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1371acdf5bf4SSatish Balay   }
1372acdf5bf4SSatish Balay 
137329bbc08cSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1374d9d09a02SSatish Balay   lrow = row - brstart;
1375acdf5bf4SSatish Balay 
1376acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1377acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1378acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1379f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1380f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1381acdf5bf4SSatish Balay   nztot = nzA + nzB;
1382acdf5bf4SSatish Balay 
1383acdf5bf4SSatish Balay   cmap  = mat->garray;
1384acdf5bf4SSatish Balay   if (v  || idx) {
1385acdf5bf4SSatish Balay     if (nztot) {
1386acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1387acdf5bf4SSatish Balay       int imark = -1;
1388acdf5bf4SSatish Balay       if (v) {
1389acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1390acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1391d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1392acdf5bf4SSatish Balay           else break;
1393acdf5bf4SSatish Balay         }
1394acdf5bf4SSatish Balay         imark = i;
1395acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1396acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1397acdf5bf4SSatish Balay       }
1398acdf5bf4SSatish Balay       if (idx) {
1399acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1400acdf5bf4SSatish Balay         if (imark > -1) {
1401acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1402bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1403acdf5bf4SSatish Balay           }
1404acdf5bf4SSatish Balay         } else {
1405acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1406d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1407d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1408acdf5bf4SSatish Balay             else break;
1409acdf5bf4SSatish Balay           }
1410acdf5bf4SSatish Balay           imark = i;
1411acdf5bf4SSatish Balay         }
1412d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1413d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1414acdf5bf4SSatish Balay       }
1415d64ed03dSBarry Smith     } else {
1416d212a18eSSatish Balay       if (idx) *idx = 0;
1417d212a18eSSatish Balay       if (v)   *v   = 0;
1418d212a18eSSatish Balay     }
1419acdf5bf4SSatish Balay   }
1420acdf5bf4SSatish Balay   *nz = nztot;
1421f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1422f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14233a40ed3dSBarry Smith   PetscFunctionReturn(0);
1424acdf5bf4SSatish Balay }
1425acdf5bf4SSatish Balay 
14264a2ae208SSatish Balay #undef __FUNCT__
14274a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
142887828ca2SBarry Smith int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1429acdf5bf4SSatish Balay {
1430acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1431d64ed03dSBarry Smith 
1432d64ed03dSBarry Smith   PetscFunctionBegin;
1433acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
143429bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1435acdf5bf4SSatish Balay   }
1436acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14373a40ed3dSBarry Smith   PetscFunctionReturn(0);
1438acdf5bf4SSatish Balay }
1439acdf5bf4SSatish Balay 
14404a2ae208SSatish Balay #undef __FUNCT__
14414a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1442ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
14435a838052SSatish Balay {
14445a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1445d64ed03dSBarry Smith 
1446d64ed03dSBarry Smith   PetscFunctionBegin;
14475a838052SSatish Balay   *bs = baij->bs;
14483a40ed3dSBarry Smith   PetscFunctionReturn(0);
14495a838052SSatish Balay }
14505a838052SSatish Balay 
14514a2ae208SSatish Balay #undef __FUNCT__
14524a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1453ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
145458667388SSatish Balay {
145558667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
145658667388SSatish Balay   int         ierr;
1457d64ed03dSBarry Smith 
1458d64ed03dSBarry Smith   PetscFunctionBegin;
145958667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
146058667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14613a40ed3dSBarry Smith   PetscFunctionReturn(0);
146258667388SSatish Balay }
14630ac07820SSatish Balay 
14644a2ae208SSatish Balay #undef __FUNCT__
14654a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1466ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14670ac07820SSatish Balay {
14684e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
14694e220ebcSLois Curfman McInnes   Mat         A = a->A,B = a->B;
14707d57db60SLois Curfman McInnes   int         ierr;
1471329f5518SBarry Smith   PetscReal   isend[5],irecv[5];
14720ac07820SSatish Balay 
1473d64ed03dSBarry Smith   PetscFunctionBegin;
1474f6275e2eSBarry Smith   info->block_size     = (PetscReal)a->bs;
14754e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
14760e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1477de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14784e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
14790e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1480de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
14810ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14824e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14834e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14844e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14854e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14864e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14870ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1488d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
14894e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14904e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14914e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14924e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14934e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14940ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1495d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
14964e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14974e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14984e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14994e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15004e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1501d41123aaSBarry Smith   } else {
150229bbc08cSBarry Smith     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
15030ac07820SSatish Balay   }
1504f6275e2eSBarry Smith   info->rows_global       = (PetscReal)A->M;
1505f6275e2eSBarry Smith   info->columns_global    = (PetscReal)A->N;
1506f6275e2eSBarry Smith   info->rows_local        = (PetscReal)A->m;
1507f6275e2eSBarry Smith   info->columns_local     = (PetscReal)A->N;
15084e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
15094e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
15104e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
15113a40ed3dSBarry Smith   PetscFunctionReturn(0);
15120ac07820SSatish Balay }
15130ac07820SSatish Balay 
15144a2ae208SSatish Balay #undef __FUNCT__
15154a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ"
1516ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
151758667388SSatish Balay {
151858667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
151998305bb5SBarry Smith   int         ierr;
152058667388SSatish Balay 
1521d64ed03dSBarry Smith   PetscFunctionBegin;
152212c028f9SKris Buschelman   switch (op) {
152312c028f9SKris Buschelman   case MAT_NO_NEW_NONZERO_LOCATIONS:
152412c028f9SKris Buschelman   case MAT_YES_NEW_NONZERO_LOCATIONS:
152512c028f9SKris Buschelman   case MAT_COLUMNS_UNSORTED:
152612c028f9SKris Buschelman   case MAT_COLUMNS_SORTED:
152712c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
152812c028f9SKris Buschelman   case MAT_KEEP_ZEROED_ROWS:
152912c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
153098305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
153198305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
153212c028f9SKris Buschelman     break;
153312c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
15347c922b88SBarry Smith     a->roworiented = PETSC_TRUE;
153598305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
153698305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
153712c028f9SKris Buschelman     break;
153812c028f9SKris Buschelman   case MAT_ROWS_SORTED:
153912c028f9SKris Buschelman   case MAT_ROWS_UNSORTED:
154012c028f9SKris Buschelman   case MAT_YES_NEW_DIAGONALS:
1541b0a32e0cSBarry Smith     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
154212c028f9SKris Buschelman     break;
154312c028f9SKris Buschelman   case MAT_COLUMN_ORIENTED:
15447c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
154598305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
154698305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
154712c028f9SKris Buschelman     break;
154812c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15497c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
155012c028f9SKris Buschelman     break;
155112c028f9SKris Buschelman   case MAT_NO_NEW_DIAGONALS:
155229bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
155312c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
15547c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
155512c028f9SKris Buschelman     break;
1556*77e54ba9SKris Buschelman   case MAT_SYMMETRIC:
1557*77e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
1558*77e54ba9SKris Buschelman     break;
155912c028f9SKris Buschelman   default:
156029bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"unknown option");
1561d64ed03dSBarry Smith   }
15623a40ed3dSBarry Smith   PetscFunctionReturn(0);
156358667388SSatish Balay }
156458667388SSatish Balay 
15654a2ae208SSatish Balay #undef __FUNCT__
15664a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ("
1567ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15680ac07820SSatish Balay {
15690ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
15700ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15710ac07820SSatish Balay   Mat         B;
1572273d9f13SBarry Smith   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
15730ac07820SSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
15743eda8832SBarry Smith   MatScalar   *a;
15750ac07820SSatish Balay 
1576d64ed03dSBarry Smith   PetscFunctionBegin;
157729bbc08cSBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1578273d9f13SBarry Smith   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
15790ac07820SSatish Balay 
15800ac07820SSatish Balay   /* copy over the A part */
15810ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15820ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
158382502324SSatish Balay   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
15840ac07820SSatish Balay 
15850ac07820SSatish Balay   for (i=0; i<mbs; i++) {
15860ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15870ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
15880ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
15890ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
15900ac07820SSatish Balay       for (k=0; k<bs; k++) {
159193fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15920ac07820SSatish Balay         col++; a += bs;
15930ac07820SSatish Balay       }
15940ac07820SSatish Balay     }
15950ac07820SSatish Balay   }
15960ac07820SSatish Balay   /* copy over the B part */
15970ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
15980ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15990ac07820SSatish Balay   for (i=0; i<mbs; i++) {
16000ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16010ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16020ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16030ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16040ac07820SSatish Balay       for (k=0; k<bs; k++) {
160593fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16060ac07820SSatish Balay         col++; a += bs;
16070ac07820SSatish Balay       }
16080ac07820SSatish Balay     }
16090ac07820SSatish Balay   }
1610606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
16110ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16120ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16130ac07820SSatish Balay 
16147c922b88SBarry Smith   if (matout) {
16150ac07820SSatish Balay     *matout = B;
16160ac07820SSatish Balay   } else {
1617273d9f13SBarry Smith     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
16180ac07820SSatish Balay   }
16193a40ed3dSBarry Smith   PetscFunctionReturn(0);
16200ac07820SSatish Balay }
16210e95ebc0SSatish Balay 
16224a2ae208SSatish Balay #undef __FUNCT__
16234a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
162436c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16250e95ebc0SSatish Balay {
162636c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
162736c4a09eSSatish Balay   Mat         a = baij->A,b = baij->B;
16280e95ebc0SSatish Balay   int         ierr,s1,s2,s3;
16290e95ebc0SSatish Balay 
1630d64ed03dSBarry Smith   PetscFunctionBegin;
163136c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
163236c4a09eSSatish Balay   if (rr) {
163336c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
163429bbc08cSBarry Smith     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
163536c4a09eSSatish Balay     /* Overlap communication with computation. */
163636c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
163736c4a09eSSatish Balay   }
16380e95ebc0SSatish Balay   if (ll) {
16390e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
164029bbc08cSBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1641a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
16420e95ebc0SSatish Balay   }
164336c4a09eSSatish Balay   /* scale  the diagonal block */
164436c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
164536c4a09eSSatish Balay 
164636c4a09eSSatish Balay   if (rr) {
164736c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
164836c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1649a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
165036c4a09eSSatish Balay   }
165136c4a09eSSatish Balay 
16523a40ed3dSBarry Smith   PetscFunctionReturn(0);
16530e95ebc0SSatish Balay }
16540e95ebc0SSatish Balay 
16554a2ae208SSatish Balay #undef __FUNCT__
16564a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1657268466fbSBarry Smith int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
16580ac07820SSatish Balay {
16590ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
16600ac07820SSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1661c1dc657dSBarry Smith   int            *nprocs,j,idx,nsends,row;
16620ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
16630ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1664a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16650ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16660ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16670ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16680ac07820SSatish Balay   IS             istmp;
166935d8aa7fSBarry Smith   PetscTruth     found;
16700ac07820SSatish Balay 
1671d64ed03dSBarry Smith   PetscFunctionBegin;
1672f14a1c24SBarry Smith   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
16730ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
16740ac07820SSatish Balay 
16750ac07820SSatish Balay   /*  first count number of contributors to each processor */
167682502324SSatish Balay   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1677549d3d68SSatish Balay   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1678b0a32e0cSBarry Smith   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
16790ac07820SSatish Balay   for (i=0; i<N; i++) {
16800ac07820SSatish Balay     idx   = rows[i];
168135d8aa7fSBarry Smith     found = PETSC_FALSE;
16820ac07820SSatish Balay     for (j=0; j<size; j++) {
16830ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1684c1dc657dSBarry Smith         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
16850ac07820SSatish Balay       }
16860ac07820SSatish Balay     }
168729bbc08cSBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
16880ac07820SSatish Balay   }
1689c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
16900ac07820SSatish Balay 
16910ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
1692c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
16930ac07820SSatish Balay 
16940ac07820SSatish Balay   /* post receives:   */
1695b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1696b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
16970ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1698ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
16990ac07820SSatish Balay   }
17000ac07820SSatish Balay 
17010ac07820SSatish Balay   /* do sends:
17020ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
17030ac07820SSatish Balay      the ith processor
17040ac07820SSatish Balay   */
1705b0a32e0cSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1706b0a32e0cSBarry Smith   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1707b0a32e0cSBarry Smith   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
17080ac07820SSatish Balay   starts[0]  = 0;
1709c1dc657dSBarry Smith   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17100ac07820SSatish Balay   for (i=0; i<N; i++) {
17110ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
17120ac07820SSatish Balay   }
17136831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
17140ac07820SSatish Balay 
17150ac07820SSatish Balay   starts[0] = 0;
1716c1dc657dSBarry Smith   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17170ac07820SSatish Balay   count = 0;
17180ac07820SSatish Balay   for (i=0; i<size; i++) {
1719c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
1720c1dc657dSBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17210ac07820SSatish Balay     }
17220ac07820SSatish Balay   }
1723606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
17240ac07820SSatish Balay 
17250ac07820SSatish Balay   base = owners[rank]*bs;
17260ac07820SSatish Balay 
17270ac07820SSatish Balay   /*  wait on receives */
1728b0a32e0cSBarry Smith   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
17290ac07820SSatish Balay   source = lens + nrecvs;
17300ac07820SSatish Balay   count  = nrecvs; slen = 0;
17310ac07820SSatish Balay   while (count) {
1732ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17330ac07820SSatish Balay     /* unpack receives into our local space */
1734ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
17350ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17360ac07820SSatish Balay     lens[imdex]    = n;
17370ac07820SSatish Balay     slen          += n;
17380ac07820SSatish Balay     count--;
17390ac07820SSatish Balay   }
1740606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
17410ac07820SSatish Balay 
17420ac07820SSatish Balay   /* move the data into the send scatter */
1743b0a32e0cSBarry Smith   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
17440ac07820SSatish Balay   count = 0;
17450ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
17460ac07820SSatish Balay     values = rvalues + i*nmax;
17470ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
17480ac07820SSatish Balay       lrows[count++] = values[j] - base;
17490ac07820SSatish Balay     }
17500ac07820SSatish Balay   }
1751606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1752606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1753606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1754606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
17550ac07820SSatish Balay 
17560ac07820SSatish Balay   /* actually zap the local rows */
1757029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1758b0a32e0cSBarry Smith   PetscLogObjectParent(A,istmp);
1759a07cd24cSSatish Balay 
176072dacd9aSBarry Smith   /*
176172dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
176272dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
176372dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
176472dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
176572dacd9aSBarry Smith 
176672dacd9aSBarry Smith        Contributed by: Mathew Knepley
176772dacd9aSBarry Smith   */
17689c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
17696fa18ffdSBarry Smith   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
17709c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
17716fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
17729c957beeSSatish Balay   } else if (diag) {
17736fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1774fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
177529bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1776fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
17776525c446SSatish Balay     }
1778a07cd24cSSatish Balay     for (i=0; i<slen; i++) {
1779a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
17803f11ea53SBarry Smith       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1781a07cd24cSSatish Balay     }
1782a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1783a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17849c957beeSSatish Balay   } else {
17856fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1786a07cd24cSSatish Balay   }
17879c957beeSSatish Balay 
17889c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1789606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1790a07cd24cSSatish Balay 
17910ac07820SSatish Balay   /* wait on sends */
17920ac07820SSatish Balay   if (nsends) {
179382502324SSatish Balay     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1794ca161407SBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1795606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
17960ac07820SSatish Balay   }
1797606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1798606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
17990ac07820SSatish Balay 
18003a40ed3dSBarry Smith   PetscFunctionReturn(0);
18010ac07820SSatish Balay }
180272dacd9aSBarry Smith 
18034a2ae208SSatish Balay #undef __FUNCT__
18044a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1805ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1806ba4ca20aSSatish Balay {
1807ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
180825fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1809133cdb44SSatish Balay   static int  called = 0;
18103a40ed3dSBarry Smith   int         ierr;
1811ba4ca20aSSatish Balay 
1812d64ed03dSBarry Smith   PetscFunctionBegin;
18133a40ed3dSBarry Smith   if (!a->rank) {
18143a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
181525fdafccSSatish Balay   }
181625fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1817d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1818d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
18193a40ed3dSBarry Smith   PetscFunctionReturn(0);
1820ba4ca20aSSatish Balay }
18210ac07820SSatish Balay 
18224a2ae208SSatish Balay #undef __FUNCT__
18234a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1824ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1825bb5a7306SBarry Smith {
1826bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1827bb5a7306SBarry Smith   int         ierr;
1828d64ed03dSBarry Smith 
1829d64ed03dSBarry Smith   PetscFunctionBegin;
1830bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
18313a40ed3dSBarry Smith   PetscFunctionReturn(0);
1832bb5a7306SBarry Smith }
1833bb5a7306SBarry Smith 
18342e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18350ac07820SSatish Balay 
18364a2ae208SSatish Balay #undef __FUNCT__
18374a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ"
18387fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
18397fc3c18eSBarry Smith {
18407fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
18417fc3c18eSBarry Smith   Mat         a,b,c,d;
18427fc3c18eSBarry Smith   PetscTruth  flg;
18437fc3c18eSBarry Smith   int         ierr;
18447fc3c18eSBarry Smith 
18457fc3c18eSBarry Smith   PetscFunctionBegin;
18467fc3c18eSBarry Smith   a = matA->A; b = matA->B;
18477fc3c18eSBarry Smith   c = matB->A; d = matB->B;
18487fc3c18eSBarry Smith 
18497fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
18507fc3c18eSBarry Smith   if (flg == PETSC_TRUE) {
18517fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18527fc3c18eSBarry Smith   }
18537fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
18547fc3c18eSBarry Smith   PetscFunctionReturn(0);
18557fc3c18eSBarry Smith }
18567fc3c18eSBarry Smith 
1857273d9f13SBarry Smith 
18584a2ae208SSatish Balay #undef __FUNCT__
18594a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1860273d9f13SBarry Smith int MatSetUpPreallocation_MPIBAIJ(Mat A)
1861273d9f13SBarry Smith {
1862273d9f13SBarry Smith   int        ierr;
1863273d9f13SBarry Smith 
1864273d9f13SBarry Smith   PetscFunctionBegin;
1865273d9f13SBarry Smith   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1866273d9f13SBarry Smith   PetscFunctionReturn(0);
1867273d9f13SBarry Smith }
1868273d9f13SBarry Smith 
186979bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1870cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1871cc2dc46cSBarry Smith        MatSetValues_MPIBAIJ,
1872cc2dc46cSBarry Smith        MatGetRow_MPIBAIJ,
1873cc2dc46cSBarry Smith        MatRestoreRow_MPIBAIJ,
1874cc2dc46cSBarry Smith        MatMult_MPIBAIJ,
187597304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ,
18767c922b88SBarry Smith        MatMultTranspose_MPIBAIJ,
18777c922b88SBarry Smith        MatMultTransposeAdd_MPIBAIJ,
1878cc2dc46cSBarry Smith        0,
1879cc2dc46cSBarry Smith        0,
1880cc2dc46cSBarry Smith        0,
188197304618SKris Buschelman /*10*/ 0,
1882cc2dc46cSBarry Smith        0,
1883cc2dc46cSBarry Smith        0,
1884cc2dc46cSBarry Smith        0,
1885cc2dc46cSBarry Smith        MatTranspose_MPIBAIJ,
188697304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ,
18877fc3c18eSBarry Smith        MatEqual_MPIBAIJ,
1888cc2dc46cSBarry Smith        MatGetDiagonal_MPIBAIJ,
1889cc2dc46cSBarry Smith        MatDiagonalScale_MPIBAIJ,
1890cc2dc46cSBarry Smith        MatNorm_MPIBAIJ,
189197304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ,
1892cc2dc46cSBarry Smith        MatAssemblyEnd_MPIBAIJ,
1893cc2dc46cSBarry Smith        0,
1894cc2dc46cSBarry Smith        MatSetOption_MPIBAIJ,
1895cc2dc46cSBarry Smith        MatZeroEntries_MPIBAIJ,
189697304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ,
1897cc2dc46cSBarry Smith        0,
1898cc2dc46cSBarry Smith        0,
1899cc2dc46cSBarry Smith        0,
1900cc2dc46cSBarry Smith        0,
190197304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ,
1902273d9f13SBarry Smith        0,
1903cc2dc46cSBarry Smith        0,
1904cc2dc46cSBarry Smith        0,
1905cc2dc46cSBarry Smith        0,
190697304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ,
1907cc2dc46cSBarry Smith        0,
1908cc2dc46cSBarry Smith        0,
1909cc2dc46cSBarry Smith        0,
1910cc2dc46cSBarry Smith        0,
191197304618SKris Buschelman /*40*/ 0,
1912cc2dc46cSBarry Smith        MatGetSubMatrices_MPIBAIJ,
1913cc2dc46cSBarry Smith        MatIncreaseOverlap_MPIBAIJ,
1914cc2dc46cSBarry Smith        MatGetValues_MPIBAIJ,
1915cc2dc46cSBarry Smith        0,
191697304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ,
1917cc2dc46cSBarry Smith        MatScale_MPIBAIJ,
1918cc2dc46cSBarry Smith        0,
1919cc2dc46cSBarry Smith        0,
1920cc2dc46cSBarry Smith        0,
192197304618SKris Buschelman /*50*/ MatGetBlockSize_MPIBAIJ,
1922cc2dc46cSBarry Smith        0,
1923cc2dc46cSBarry Smith        0,
1924cc2dc46cSBarry Smith        0,
1925cc2dc46cSBarry Smith        0,
192697304618SKris Buschelman /*55*/ 0,
1927cc2dc46cSBarry Smith        0,
1928cc2dc46cSBarry Smith        MatSetUnfactored_MPIBAIJ,
1929cc2dc46cSBarry Smith        0,
1930cc2dc46cSBarry Smith        MatSetValuesBlocked_MPIBAIJ,
193197304618SKris Buschelman /*60*/ 0,
1932f14a1c24SBarry Smith        MatDestroy_MPIBAIJ,
1933f14a1c24SBarry Smith        MatView_MPIBAIJ,
19348a124369SBarry Smith        MatGetPetscMaps_Petsc,
19357843d17aSBarry Smith        0,
193697304618SKris Buschelman /*65*/ 0,
19377843d17aSBarry Smith        0,
19387843d17aSBarry Smith        0,
19397843d17aSBarry Smith        0,
19407843d17aSBarry Smith        0,
194197304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ,
19427843d17aSBarry Smith        0,
194397304618SKris Buschelman        0,
194497304618SKris Buschelman        0,
194597304618SKris Buschelman        0,
194697304618SKris Buschelman /*75*/ 0,
194797304618SKris Buschelman        0,
194897304618SKris Buschelman        0,
194997304618SKris Buschelman        0,
195097304618SKris Buschelman        0,
195197304618SKris Buschelman /*80*/ 0,
195297304618SKris Buschelman        0,
195397304618SKris Buschelman        0,
195497304618SKris Buschelman        0,
195597304618SKris Buschelman        0,
195697304618SKris Buschelman /*85*/ MatLoad_MPIBAIJ
195797304618SKris Buschelman };
195879bdfe76SSatish Balay 
19595ef9f2a5SBarry Smith 
1960e18c124aSSatish Balay EXTERN_C_BEGIN
19614a2ae208SSatish Balay #undef __FUNCT__
19624a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
19635ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
19645ef9f2a5SBarry Smith {
19655ef9f2a5SBarry Smith   PetscFunctionBegin;
19665ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
19675ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
19685ef9f2a5SBarry Smith   PetscFunctionReturn(0);
19695ef9f2a5SBarry Smith }
1970e18c124aSSatish Balay EXTERN_C_END
197179bdfe76SSatish Balay 
1972273d9f13SBarry Smith EXTERN_C_BEGIN
19734a2ae208SSatish Balay #undef __FUNCT__
1974a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
1975a23d5eceSKris Buschelman int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1976a23d5eceSKris Buschelman {
1977a23d5eceSKris Buschelman   Mat_MPIBAIJ  *b;
1978a23d5eceSKris Buschelman   int          ierr,i;
1979a23d5eceSKris Buschelman 
1980a23d5eceSKris Buschelman   PetscFunctionBegin;
1981a23d5eceSKris Buschelman   B->preallocated = PETSC_TRUE;
1982a23d5eceSKris Buschelman   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1983a23d5eceSKris Buschelman 
1984a23d5eceSKris Buschelman   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1985a23d5eceSKris Buschelman   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1986a23d5eceSKris Buschelman   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1987a23d5eceSKris Buschelman   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1988a23d5eceSKris Buschelman   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1989a23d5eceSKris Buschelman   if (d_nnz) {
1990a23d5eceSKris Buschelman   for (i=0; i<B->m/bs; i++) {
1991a23d5eceSKris Buschelman       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1992a23d5eceSKris Buschelman     }
1993a23d5eceSKris Buschelman   }
1994a23d5eceSKris Buschelman   if (o_nnz) {
1995a23d5eceSKris Buschelman     for (i=0; i<B->m/bs; i++) {
1996a23d5eceSKris Buschelman       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1997a23d5eceSKris Buschelman     }
1998a23d5eceSKris Buschelman   }
1999a23d5eceSKris Buschelman 
2000a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2001a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2002a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2003a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2004a23d5eceSKris Buschelman 
2005a23d5eceSKris Buschelman   b = (Mat_MPIBAIJ*)B->data;
2006a23d5eceSKris Buschelman   b->bs  = bs;
2007a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2008a23d5eceSKris Buschelman   b->mbs = B->m/bs;
2009a23d5eceSKris Buschelman   b->nbs = B->n/bs;
2010a23d5eceSKris Buschelman   b->Mbs = B->M/bs;
2011a23d5eceSKris Buschelman   b->Nbs = B->N/bs;
2012a23d5eceSKris Buschelman 
2013a23d5eceSKris Buschelman   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2014a23d5eceSKris Buschelman   b->rowners[0]    = 0;
2015a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2016a23d5eceSKris Buschelman     b->rowners[i] += b->rowners[i-1];
2017a23d5eceSKris Buschelman   }
2018a23d5eceSKris Buschelman   b->rstart    = b->rowners[b->rank];
2019a23d5eceSKris Buschelman   b->rend      = b->rowners[b->rank+1];
2020a23d5eceSKris Buschelman 
2021a23d5eceSKris Buschelman   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2022a23d5eceSKris Buschelman   b->cowners[0] = 0;
2023a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2024a23d5eceSKris Buschelman     b->cowners[i] += b->cowners[i-1];
2025a23d5eceSKris Buschelman   }
2026a23d5eceSKris Buschelman   b->cstart    = b->cowners[b->rank];
2027a23d5eceSKris Buschelman   b->cend      = b->cowners[b->rank+1];
2028a23d5eceSKris Buschelman 
2029a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2030a23d5eceSKris Buschelman     b->rowners_bs[i] = b->rowners[i]*bs;
2031a23d5eceSKris Buschelman   }
2032a23d5eceSKris Buschelman   b->rstart_bs = b->rstart*bs;
2033a23d5eceSKris Buschelman   b->rend_bs   = b->rend*bs;
2034a23d5eceSKris Buschelman   b->cstart_bs = b->cstart*bs;
2035a23d5eceSKris Buschelman   b->cend_bs   = b->cend*bs;
2036a23d5eceSKris Buschelman 
2037a23d5eceSKris Buschelman   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2038a23d5eceSKris Buschelman   PetscLogObjectParent(B,b->A);
2039a23d5eceSKris Buschelman   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2040a23d5eceSKris Buschelman   PetscLogObjectParent(B,b->B);
2041a23d5eceSKris Buschelman   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2042a23d5eceSKris Buschelman 
2043a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2044a23d5eceSKris Buschelman }
2045a23d5eceSKris Buschelman EXTERN_C_END
2046a23d5eceSKris Buschelman 
2047a23d5eceSKris Buschelman EXTERN_C_BEGIN
204892b32695SKris Buschelman EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
20495bf65638SKris Buschelman EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
205092b32695SKris Buschelman EXTERN_C_END
20515bf65638SKris Buschelman 
20520bad9183SKris Buschelman /*MC
2053fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
20540bad9183SKris Buschelman 
20550bad9183SKris Buschelman    Options Database Keys:
20560bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
20570bad9183SKris Buschelman 
20580bad9183SKris Buschelman   Level: beginner
20590bad9183SKris Buschelman 
20600bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ
20610bad9183SKris Buschelman M*/
20620bad9183SKris Buschelman 
206392b32695SKris Buschelman EXTERN_C_BEGIN
2064a23d5eceSKris Buschelman #undef __FUNCT__
20654a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ"
2066273d9f13SBarry Smith int MatCreate_MPIBAIJ(Mat B)
2067273d9f13SBarry Smith {
2068273d9f13SBarry Smith   Mat_MPIBAIJ  *b;
2069cfce73b9SSatish Balay   int          ierr;
2070273d9f13SBarry Smith   PetscTruth   flg;
2071273d9f13SBarry Smith 
2072273d9f13SBarry Smith   PetscFunctionBegin;
2073273d9f13SBarry Smith 
207482502324SSatish Balay   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
207582502324SSatish Balay   B->data = (void*)b;
207682502324SSatish Balay 
2077273d9f13SBarry Smith   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2078273d9f13SBarry Smith   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2079273d9f13SBarry Smith   B->mapping    = 0;
2080273d9f13SBarry Smith   B->factor     = 0;
2081273d9f13SBarry Smith   B->assembled  = PETSC_FALSE;
2082273d9f13SBarry Smith 
2083273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
2084273d9f13SBarry Smith   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2085273d9f13SBarry Smith   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2086273d9f13SBarry Smith 
2087273d9f13SBarry Smith   /* build local table of row and column ownerships */
208882502324SSatish Balay   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2089b0a32e0cSBarry Smith   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2090273d9f13SBarry Smith   b->cowners    = b->rowners + b->size + 2;
2091273d9f13SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2092273d9f13SBarry Smith 
2093273d9f13SBarry Smith   /* build cache for off array entries formed */
2094273d9f13SBarry Smith   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2095273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
2096273d9f13SBarry Smith   b->colmap      = PETSC_NULL;
2097273d9f13SBarry Smith   b->garray      = PETSC_NULL;
2098273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2099273d9f13SBarry Smith 
2100cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
2101273d9f13SBarry Smith   /* stuff for MatSetValues_XXX in single precision */
210264a35ccbSBarry Smith   b->setvalueslen     = 0;
2103273d9f13SBarry Smith   b->setvaluescopy    = PETSC_NULL;
2104273d9f13SBarry Smith #endif
2105273d9f13SBarry Smith 
2106273d9f13SBarry Smith   /* stuff used in block assembly */
2107273d9f13SBarry Smith   b->barray       = 0;
2108273d9f13SBarry Smith 
2109273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2110273d9f13SBarry Smith   b->lvec         = 0;
2111273d9f13SBarry Smith   b->Mvctx        = 0;
2112273d9f13SBarry Smith 
2113273d9f13SBarry Smith   /* stuff for MatGetRow() */
2114273d9f13SBarry Smith   b->rowindices   = 0;
2115273d9f13SBarry Smith   b->rowvalues    = 0;
2116273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2117273d9f13SBarry Smith 
2118273d9f13SBarry Smith   /* hash table stuff */
2119273d9f13SBarry Smith   b->ht           = 0;
2120273d9f13SBarry Smith   b->hd           = 0;
2121273d9f13SBarry Smith   b->ht_size      = 0;
2122273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2123273d9f13SBarry Smith   b->ht_fact      = 0;
2124273d9f13SBarry Smith   b->ht_total_ct  = 0;
2125273d9f13SBarry Smith   b->ht_insert_ct = 0;
2126273d9f13SBarry Smith 
2127b0a32e0cSBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2128273d9f13SBarry Smith   if (flg) {
2129f6275e2eSBarry Smith     PetscReal fact = 1.39;
2130273d9f13SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
213187828ca2SBarry Smith     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2132273d9f13SBarry Smith     if (fact <= 1.0) fact = 1.39;
2133273d9f13SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2134b0a32e0cSBarry Smith     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2135273d9f13SBarry Smith   }
2136273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2137273d9f13SBarry Smith                                      "MatStoreValues_MPIBAIJ",
2138273d9f13SBarry Smith                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2139273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2140273d9f13SBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
2141273d9f13SBarry Smith                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2142273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2143273d9f13SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
2144273d9f13SBarry Smith                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2145a23d5eceSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2146a23d5eceSKris Buschelman                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2147a23d5eceSKris Buschelman                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
214892b32695SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
214992b32695SKris Buschelman                                      "MatDiagonalScaleLocal_MPIBAIJ",
215092b32695SKris Buschelman                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
21515bf65638SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
21525bf65638SKris Buschelman                                      "MatSetHashTableFactor_MPIBAIJ",
21535bf65638SKris Buschelman                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2154273d9f13SBarry Smith   PetscFunctionReturn(0);
2155273d9f13SBarry Smith }
2156273d9f13SBarry Smith EXTERN_C_END
2157273d9f13SBarry Smith 
2158209238afSKris Buschelman /*MC
2159002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2160209238afSKris Buschelman 
2161209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2162209238afSKris Buschelman    and MATMPIBAIJ otherwise.
2163209238afSKris Buschelman 
2164209238afSKris Buschelman    Options Database Keys:
2165209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2166209238afSKris Buschelman 
2167209238afSKris Buschelman   Level: beginner
2168209238afSKris Buschelman 
2169209238afSKris Buschelman .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2170209238afSKris Buschelman M*/
2171209238afSKris Buschelman 
2172209238afSKris Buschelman EXTERN_C_BEGIN
2173209238afSKris Buschelman #undef __FUNCT__
2174209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ"
2175209238afSKris Buschelman int MatCreate_BAIJ(Mat A) {
2176209238afSKris Buschelman   int ierr,size;
2177209238afSKris Buschelman 
2178209238afSKris Buschelman   PetscFunctionBegin;
2179209238afSKris Buschelman   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2180209238afSKris Buschelman   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2181209238afSKris Buschelman   if (size == 1) {
2182209238afSKris Buschelman     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2183209238afSKris Buschelman   } else {
2184209238afSKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2185209238afSKris Buschelman   }
2186209238afSKris Buschelman   PetscFunctionReturn(0);
2187209238afSKris Buschelman }
2188209238afSKris Buschelman EXTERN_C_END
2189209238afSKris Buschelman 
21904a2ae208SSatish Balay #undef __FUNCT__
21914a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2192273d9f13SBarry Smith /*@C
2193273d9f13SBarry Smith    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2194273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2195273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2196273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2197273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2198273d9f13SBarry Smith 
2199273d9f13SBarry Smith    Collective on Mat
2200273d9f13SBarry Smith 
2201273d9f13SBarry Smith    Input Parameters:
2202273d9f13SBarry Smith +  A - the matrix
2203273d9f13SBarry Smith .  bs   - size of blockk
2204273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2205273d9f13SBarry Smith            submatrix  (same for all local rows)
2206273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
2207273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
2208273d9f13SBarry Smith            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2209273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2210273d9f13SBarry Smith            submatrix (same for all local rows).
2211273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
2212273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
2213273d9f13SBarry Smith            each block row) or PETSC_NULL.
2214273d9f13SBarry Smith 
2215273d9f13SBarry Smith    Output Parameter:
2216273d9f13SBarry Smith 
2217273d9f13SBarry Smith 
2218273d9f13SBarry Smith    Options Database Keys:
2219273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2220273d9f13SBarry Smith                      block calculations (much slower)
2221273d9f13SBarry Smith .   -mat_block_size - size of the blocks to use
2222273d9f13SBarry Smith 
2223273d9f13SBarry Smith    Notes:
2224273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2225273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
2226273d9f13SBarry Smith 
2227273d9f13SBarry Smith    Storage Information:
2228273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
2229273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
2230273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
2231273d9f13SBarry Smith    local matrix (a rectangular submatrix).
2232273d9f13SBarry Smith 
2233273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
2234273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
2235273d9f13SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2236273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
2237273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2238273d9f13SBarry Smith 
2239273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2240273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
2241273d9f13SBarry Smith 
2242273d9f13SBarry Smith .vb
2243273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
2244273d9f13SBarry Smith           -------------------
2245273d9f13SBarry Smith    row 3  |  o o o d d d o o o o o o
2246273d9f13SBarry Smith    row 4  |  o o o d d d o o o o o o
2247273d9f13SBarry Smith    row 5  |  o o o d d d o o o o o o
2248273d9f13SBarry Smith           -------------------
2249273d9f13SBarry Smith .ve
2250273d9f13SBarry Smith 
2251273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
2252273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
2253273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2254273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
2255273d9f13SBarry Smith 
2256273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2257273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2258273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
2259273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2260273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
2261273d9f13SBarry Smith    matrices.
2262273d9f13SBarry Smith 
2263273d9f13SBarry Smith    Level: intermediate
2264273d9f13SBarry Smith 
2265273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel
2266273d9f13SBarry Smith 
2267273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2268273d9f13SBarry Smith @*/
2269ca01db9bSBarry Smith int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2270273d9f13SBarry Smith {
2271ca01db9bSBarry Smith   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2272273d9f13SBarry Smith 
2273273d9f13SBarry Smith   PetscFunctionBegin;
2274a23d5eceSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2275a23d5eceSKris Buschelman   if (f) {
2276a23d5eceSKris Buschelman     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2277273d9f13SBarry Smith   }
2278273d9f13SBarry Smith   PetscFunctionReturn(0);
2279273d9f13SBarry Smith }
2280273d9f13SBarry Smith 
22814a2ae208SSatish Balay #undef __FUNCT__
22824a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ"
228379bdfe76SSatish Balay /*@C
228479bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
228579bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
228679bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
228779bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
228879bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
228979bdfe76SSatish Balay 
2290db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
2291db81eaa0SLois Curfman McInnes 
229279bdfe76SSatish Balay    Input Parameters:
2293db81eaa0SLois Curfman McInnes +  comm - MPI communicator
229479bdfe76SSatish Balay .  bs   - size of blockk
229579bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
229692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
229792e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
229892e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
229992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
230092e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
2301be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2302be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
230347a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
230479bdfe76SSatish Balay            submatrix  (same for all local rows)
230547a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
230692e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2307db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
230847a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
230979bdfe76SSatish Balay            submatrix (same for all local rows).
231047a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
231192e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
231292e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
231379bdfe76SSatish Balay 
231479bdfe76SSatish Balay    Output Parameter:
231579bdfe76SSatish Balay .  A - the matrix
231679bdfe76SSatish Balay 
2317db81eaa0SLois Curfman McInnes    Options Database Keys:
2318db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
2319db81eaa0SLois Curfman McInnes                      block calculations (much slower)
2320db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
23213ffaccefSLois Curfman McInnes 
2322b259b22eSLois Curfman McInnes    Notes:
232347a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
232447a75d0bSBarry Smith 
232579bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
232679bdfe76SSatish Balay    (possibly both).
232779bdfe76SSatish Balay 
2328be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2329be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2330be79a94dSBarry Smith 
233179bdfe76SSatish Balay    Storage Information:
233279bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
233379bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
233479bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
233579bdfe76SSatish Balay    local matrix (a rectangular submatrix).
233679bdfe76SSatish Balay 
233779bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
233879bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
233979bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
234079bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
234179bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
234279bdfe76SSatish Balay 
234379bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
234479bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
234579bdfe76SSatish Balay 
2346db81eaa0SLois Curfman McInnes .vb
2347db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2348db81eaa0SLois Curfman McInnes           -------------------
2349db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2350db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2351db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2352db81eaa0SLois Curfman McInnes           -------------------
2353db81eaa0SLois Curfman McInnes .ve
235479bdfe76SSatish Balay 
235579bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
235679bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
235779bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
235857b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
235979bdfe76SSatish Balay 
2360d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2361d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
236279bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
236392e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
236492e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
23656da5968aSLois Curfman McInnes    matrices.
236679bdfe76SSatish Balay 
2367027ccd11SLois Curfman McInnes    Level: intermediate
2368027ccd11SLois Curfman McInnes 
236992e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
237079bdfe76SSatish Balay 
23713eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
237279bdfe76SSatish Balay @*/
2373ca01db9bSBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
237479bdfe76SSatish Balay {
2375273d9f13SBarry Smith   int ierr,size;
237679bdfe76SSatish Balay 
2377d64ed03dSBarry Smith   PetscFunctionBegin;
2378273d9f13SBarry Smith   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2379d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2380273d9f13SBarry Smith   if (size > 1) {
2381273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2382273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2383273d9f13SBarry Smith   } else {
2384273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2385273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
23863914022bSBarry Smith   }
23873a40ed3dSBarry Smith   PetscFunctionReturn(0);
238879bdfe76SSatish Balay }
2389026e39d0SSatish Balay 
23904a2ae208SSatish Balay #undef __FUNCT__
23914a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ"
23922e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
23930ac07820SSatish Balay {
23940ac07820SSatish Balay   Mat         mat;
23950ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2396f1af5d2fSBarry Smith   int         ierr,len=0;
23970ac07820SSatish Balay 
2398d64ed03dSBarry Smith   PetscFunctionBegin;
23990ac07820SSatish Balay   *newmat       = 0;
2400273d9f13SBarry Smith   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2401273d9f13SBarry Smith   ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr);
2402273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
24030ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
2404273d9f13SBarry Smith   a      = (Mat_MPIBAIJ*)mat->data;
24050ac07820SSatish Balay   a->bs  = oldmat->bs;
24060ac07820SSatish Balay   a->bs2 = oldmat->bs2;
24070ac07820SSatish Balay   a->mbs = oldmat->mbs;
24080ac07820SSatish Balay   a->nbs = oldmat->nbs;
24090ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
24100ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
24110ac07820SSatish Balay 
24120ac07820SSatish Balay   a->rstart       = oldmat->rstart;
24130ac07820SSatish Balay   a->rend         = oldmat->rend;
24140ac07820SSatish Balay   a->cstart       = oldmat->cstart;
24150ac07820SSatish Balay   a->cend         = oldmat->cend;
24160ac07820SSatish Balay   a->size         = oldmat->size;
24170ac07820SSatish Balay   a->rank         = oldmat->rank;
2418aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2419aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2420aef5e8e0SSatish Balay   a->rowindices   = 0;
24210ac07820SSatish Balay   a->rowvalues    = 0;
24220ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
242330793edcSSatish Balay   a->barray       = 0;
24243123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
24253123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
24263123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
24273123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
24280ac07820SSatish Balay 
2429133cdb44SSatish Balay   /* hash table stuff */
2430133cdb44SSatish Balay   a->ht           = 0;
2431133cdb44SSatish Balay   a->hd           = 0;
2432133cdb44SSatish Balay   a->ht_size      = 0;
2433133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
243425fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2435133cdb44SSatish Balay   a->ht_total_ct  = 0;
2436133cdb44SSatish Balay   a->ht_insert_ct = 0;
2437133cdb44SSatish Balay 
2438549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
24398798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
24408798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
24410ac07820SSatish Balay   if (oldmat->colmap) {
2442aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
24430f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
244448e59246SSatish Balay #else
244582502324SSatish Balay   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2446b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2447549d3d68SSatish Balay   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
244848e59246SSatish Balay #endif
24490ac07820SSatish Balay   } else a->colmap = 0;
24500ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
245182502324SSatish Balay     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2452b0a32e0cSBarry Smith     PetscLogObjectMemory(mat,len*sizeof(int));
2453549d3d68SSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
24540ac07820SSatish Balay   } else a->garray = 0;
24550ac07820SSatish Balay 
24560ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2457b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->lvec);
24580ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2459e18c124aSSatish Balay 
2460b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->Mvctx);
24612e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2462b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->A);
24632e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2464b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->B);
2465b0a32e0cSBarry Smith   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
24660ac07820SSatish Balay   *newmat = mat;
24673a40ed3dSBarry Smith   PetscFunctionReturn(0);
24680ac07820SSatish Balay }
246957b952d6SSatish Balay 
2470e090d566SSatish Balay #include "petscsys.h"
247157b952d6SSatish Balay 
24724a2ae208SSatish Balay #undef __FUNCT__
24734a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ"
2474b0a32e0cSBarry Smith int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
247557b952d6SSatish Balay {
247657b952d6SSatish Balay   Mat          A;
247757b952d6SSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
247887828ca2SBarry Smith   PetscScalar  *vals,*buf;
247957b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
248057b952d6SSatish Balay   MPI_Status   status;
2481cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
248257b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2483f1af5d2fSBarry Smith   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
248457b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
248557b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
248657b952d6SSatish Balay 
2487d64ed03dSBarry Smith   PetscFunctionBegin;
2488b0a32e0cSBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
248957b952d6SSatish Balay 
2490d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2491d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
249257b952d6SSatish Balay   if (!rank) {
2493b0a32e0cSBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2494e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2495552e946dSBarry Smith     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2496d64ed03dSBarry Smith     if (header[3] < 0) {
249729bbc08cSBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2498d64ed03dSBarry Smith     }
24996c5fab8fSBarry Smith   }
2500d64ed03dSBarry Smith 
2501ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
250257b952d6SSatish Balay   M = header[1]; N = header[2];
250357b952d6SSatish Balay 
250429bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
250557b952d6SSatish Balay 
250657b952d6SSatish Balay   /*
250757b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
250857b952d6SSatish Balay      divisible by the blocksize
250957b952d6SSatish Balay   */
251057b952d6SSatish Balay   Mbs        = M/bs;
251157b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
251257b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
251357b952d6SSatish Balay   else                  Mbs++;
251457b952d6SSatish Balay   if (extra_rows &&!rank) {
2515b0a32e0cSBarry Smith     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
251657b952d6SSatish Balay   }
2517537820f0SBarry Smith 
251857b952d6SSatish Balay   /* determine ownership of all rows */
251957b952d6SSatish Balay   mbs        = Mbs/size + ((Mbs % size) > rank);
252057b952d6SSatish Balay   m          = mbs*bs;
2521b0a32e0cSBarry Smith   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2522cee3aa6bSSatish Balay   browners   = rowners + size + 1;
2523ca161407SBarry Smith   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
252457b952d6SSatish Balay   rowners[0] = 0;
2525cee3aa6bSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2526cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
252757b952d6SSatish Balay   rstart = rowners[rank];
252857b952d6SSatish Balay   rend   = rowners[rank+1];
252957b952d6SSatish Balay 
253057b952d6SSatish Balay   /* distribute row lengths to all processors */
253182502324SSatish Balay   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
253257b952d6SSatish Balay   if (!rank) {
2533b0a32e0cSBarry Smith     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2534e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
253557b952d6SSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
253682502324SSatish Balay     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2537cee3aa6bSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2538ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2539606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2540d64ed03dSBarry Smith   } else {
2541ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
254257b952d6SSatish Balay   }
254357b952d6SSatish Balay 
254457b952d6SSatish Balay   if (!rank) {
254557b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
254682502324SSatish Balay     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2547549d3d68SSatish Balay     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
254857b952d6SSatish Balay     for (i=0; i<size; i++) {
254957b952d6SSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
255057b952d6SSatish Balay         procsnz[i] += rowlengths[j];
255157b952d6SSatish Balay       }
255257b952d6SSatish Balay     }
2553606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
255457b952d6SSatish Balay 
255557b952d6SSatish Balay     /* determine max buffer needed and allocate it */
255657b952d6SSatish Balay     maxnz = 0;
255757b952d6SSatish Balay     for (i=0; i<size; i++) {
255857b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
255957b952d6SSatish Balay     }
256082502324SSatish Balay     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
256157b952d6SSatish Balay 
256257b952d6SSatish Balay     /* read in my part of the matrix column indices  */
256357b952d6SSatish Balay     nz     = procsnz[0];
256482502324SSatish Balay     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
256557b952d6SSatish Balay     mycols = ibuf;
2566cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2567e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2568cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2569cee3aa6bSSatish Balay 
257057b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
257157b952d6SSatish Balay     for (i=1; i<size-1; i++) {
257257b952d6SSatish Balay       nz   = procsnz[i];
2573e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2574ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
257557b952d6SSatish Balay     }
257657b952d6SSatish Balay     /* read in the stuff for the last proc */
257757b952d6SSatish Balay     if (size != 1) {
257857b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2579e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
258057b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2581ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
258257b952d6SSatish Balay     }
2583606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2584d64ed03dSBarry Smith   } else {
258557b952d6SSatish Balay     /* determine buffer space needed for message */
258657b952d6SSatish Balay     nz = 0;
258757b952d6SSatish Balay     for (i=0; i<m; i++) {
258857b952d6SSatish Balay       nz += locrowlens[i];
258957b952d6SSatish Balay     }
259082502324SSatish Balay     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
259157b952d6SSatish Balay     mycols = ibuf;
259257b952d6SSatish Balay     /* receive message of column indices*/
2593ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2594ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
259529bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
259657b952d6SSatish Balay   }
259757b952d6SSatish Balay 
259857b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
259982502324SSatish Balay   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2600cee3aa6bSSatish Balay   odlens   = dlens + (rend-rstart);
260182502324SSatish Balay   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2602549d3d68SSatish Balay   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
260357b952d6SSatish Balay   masked1  = mask    + Mbs;
260457b952d6SSatish Balay   masked2  = masked1 + Mbs;
260557b952d6SSatish Balay   rowcount = 0; nzcount = 0;
260657b952d6SSatish Balay   for (i=0; i<mbs; i++) {
260757b952d6SSatish Balay     dcount  = 0;
260857b952d6SSatish Balay     odcount = 0;
260957b952d6SSatish Balay     for (j=0; j<bs; j++) {
261057b952d6SSatish Balay       kmax = locrowlens[rowcount];
261157b952d6SSatish Balay       for (k=0; k<kmax; k++) {
261257b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
261357b952d6SSatish Balay         if (!mask[tmp]) {
261457b952d6SSatish Balay           mask[tmp] = 1;
261557b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
261657b952d6SSatish Balay           else masked1[dcount++] = tmp;
261757b952d6SSatish Balay         }
261857b952d6SSatish Balay       }
261957b952d6SSatish Balay       rowcount++;
262057b952d6SSatish Balay     }
2621cee3aa6bSSatish Balay 
262257b952d6SSatish Balay     dlens[i]  = dcount;
262357b952d6SSatish Balay     odlens[i] = odcount;
2624cee3aa6bSSatish Balay 
262557b952d6SSatish Balay     /* zero out the mask elements we set */
262657b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
262757b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
262857b952d6SSatish Balay   }
2629cee3aa6bSSatish Balay 
263057b952d6SSatish Balay   /* create our matrix */
263178ae41b4SKris Buschelman   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
263278ae41b4SKris Buschelman   ierr = MatSetType(A,type);CHKERRQ(ierr)
263378ae41b4SKris Buschelman   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
263478ae41b4SKris Buschelman 
263578ae41b4SKris Buschelman   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
26366d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
263757b952d6SSatish Balay 
263857b952d6SSatish Balay   if (!rank) {
263987828ca2SBarry Smith     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
264057b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
264157b952d6SSatish Balay     nz = procsnz[0];
264257b952d6SSatish Balay     vals = buf;
2643cee3aa6bSSatish Balay     mycols = ibuf;
2644cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2645e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2646cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2647537820f0SBarry Smith 
264857b952d6SSatish Balay     /* insert into matrix */
264957b952d6SSatish Balay     jj      = rstart*bs;
265057b952d6SSatish Balay     for (i=0; i<m; i++) {
2651b48991e4SBarry Smith       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
265257b952d6SSatish Balay       mycols += locrowlens[i];
265357b952d6SSatish Balay       vals   += locrowlens[i];
265457b952d6SSatish Balay       jj++;
265557b952d6SSatish Balay     }
265657b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
265757b952d6SSatish Balay     for (i=1; i<size-1; i++) {
265857b952d6SSatish Balay       nz   = procsnz[i];
265957b952d6SSatish Balay       vals = buf;
2660e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2661ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
266257b952d6SSatish Balay     }
266357b952d6SSatish Balay     /* the last proc */
266457b952d6SSatish Balay     if (size != 1){
266557b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2666cee3aa6bSSatish Balay       vals = buf;
2667e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
266857b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2669ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
267057b952d6SSatish Balay     }
2671606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2672d64ed03dSBarry Smith   } else {
267357b952d6SSatish Balay     /* receive numeric values */
267487828ca2SBarry Smith     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
267557b952d6SSatish Balay 
267657b952d6SSatish Balay     /* receive message of values*/
267757b952d6SSatish Balay     vals   = buf;
2678cee3aa6bSSatish Balay     mycols = ibuf;
2679ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2680ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
268129bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
268257b952d6SSatish Balay 
268357b952d6SSatish Balay     /* insert into matrix */
268457b952d6SSatish Balay     jj      = rstart*bs;
2685cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
2686b48991e4SBarry Smith       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
268757b952d6SSatish Balay       mycols += locrowlens[i];
268857b952d6SSatish Balay       vals   += locrowlens[i];
268957b952d6SSatish Balay       jj++;
269057b952d6SSatish Balay     }
269157b952d6SSatish Balay   }
2692606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2693606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2694606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2695606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2696606d414cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2697606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
26986d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
26996d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270078ae41b4SKris Buschelman 
270178ae41b4SKris Buschelman   *newmat = A;
27023a40ed3dSBarry Smith   PetscFunctionReturn(0);
270357b952d6SSatish Balay }
270457b952d6SSatish Balay 
27054a2ae208SSatish Balay #undef __FUNCT__
27064a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2707133cdb44SSatish Balay /*@
2708133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2709133cdb44SSatish Balay 
2710133cdb44SSatish Balay    Input Parameters:
2711133cdb44SSatish Balay .  mat  - the matrix
2712133cdb44SSatish Balay .  fact - factor
2713133cdb44SSatish Balay 
2714fee21e36SBarry Smith    Collective on Mat
2715fee21e36SBarry Smith 
27168c890885SBarry Smith    Level: advanced
27178c890885SBarry Smith 
2718133cdb44SSatish Balay   Notes:
2719133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2720133cdb44SSatish Balay 
2721133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2722133cdb44SSatish Balay 
2723133cdb44SSatish Balay .seealso: MatSetOption()
2724133cdb44SSatish Balay @*/
2725329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2726133cdb44SSatish Balay {
27275bf65638SKris Buschelman   int ierr,(*f)(Mat,PetscReal);
27285bf65638SKris Buschelman 
27295bf65638SKris Buschelman   PetscFunctionBegin;
27305bf65638SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
27315bf65638SKris Buschelman   if (f) {
27325bf65638SKris Buschelman     ierr = (*f)(mat,fact);CHKERRQ(ierr);
27335bf65638SKris Buschelman   }
27345bf65638SKris Buschelman   PetscFunctionReturn(0);
27355bf65638SKris Buschelman }
27365bf65638SKris Buschelman 
27375bf65638SKris Buschelman #undef __FUNCT__
27385bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
27395bf65638SKris Buschelman int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
27405bf65638SKris Buschelman {
274125fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2742133cdb44SSatish Balay 
2743133cdb44SSatish Balay   PetscFunctionBegin;
2744133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2745133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2746133cdb44SSatish Balay   baij->ht_fact = fact;
2747133cdb44SSatish Balay   PetscFunctionReturn(0);
2748133cdb44SSatish Balay }
2749f2a5309cSSatish Balay 
27504a2ae208SSatish Balay #undef __FUNCT__
27514a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2752268466fbSBarry Smith int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2753f2a5309cSSatish Balay {
2754f2a5309cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2755f2a5309cSSatish Balay   PetscFunctionBegin;
2756f2a5309cSSatish Balay   *Ad     = a->A;
2757f2a5309cSSatish Balay   *Ao     = a->B;
2758195d93cdSBarry Smith   *colmap = a->garray;
2759f2a5309cSSatish Balay   PetscFunctionReturn(0);
2760f2a5309cSSatish Balay }
2761