xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2188ac68de74e38cb94a171751124ccd01af73a8)
1bba1ac68SSatish Balay /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
279bdfe76SSatish Balay 
3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
479bdfe76SSatish Balay 
5ca44d042SBarry Smith EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
6ca44d042SBarry Smith EXTERN int DisAssemble_MPIBAIJ(Mat);
7268466fbSBarry Smith EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
8268466fbSBarry Smith EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
9f15d580aSBarry Smith EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
10f15d580aSBarry Smith EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
11f15d580aSBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
12268466fbSBarry Smith EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
13268466fbSBarry Smith EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
14ca44d042SBarry Smith EXTERN int MatPrintHelp_SeqBAIJ(Mat);
15268466fbSBarry Smith EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
1693fea6afSBarry Smith 
1793fea6afSBarry Smith /*  UGLY, ugly, ugly
1887828ca2SBarry Smith    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
1993fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
2093fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2193fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2293fea6afSBarry Smith    into the single precision data structures.
2393fea6afSBarry Smith */
2493fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2534232ad1SSatish Balay EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2634232ad1SSatish Balay EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2734232ad1SSatish Balay EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2834232ad1SSatish Balay EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2934232ad1SSatish Balay EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
3093fea6afSBarry Smith #else
316fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
3293fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
3393fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
346fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
356fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
3693fea6afSBarry Smith #endif
373b2fbd54SBarry Smith 
384a2ae208SSatish Balay #undef __FUNCT__
394a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
407843d17aSBarry Smith int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
417843d17aSBarry Smith {
427843d17aSBarry Smith   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
437843d17aSBarry Smith   int          ierr,i;
4487828ca2SBarry Smith   PetscScalar  *va,*vb;
457843d17aSBarry Smith   Vec          vtmp;
467843d17aSBarry Smith 
477843d17aSBarry Smith   PetscFunctionBegin;
487843d17aSBarry Smith 
497843d17aSBarry Smith   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
501ebc52fbSHong Zhang   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
517843d17aSBarry Smith 
52ac355199SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
537843d17aSBarry Smith   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
541ebc52fbSHong Zhang   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
557843d17aSBarry Smith 
56273d9f13SBarry Smith   for (i=0; i<A->m; i++){
57273d9f13SBarry Smith     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
587843d17aSBarry Smith   }
597843d17aSBarry Smith 
601ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
611ebc52fbSHong Zhang   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62ac355199SBarry Smith   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
637843d17aSBarry Smith 
647843d17aSBarry Smith   PetscFunctionReturn(0);
657843d17aSBarry Smith }
667843d17aSBarry Smith 
677fc3c18eSBarry Smith EXTERN_C_BEGIN
684a2ae208SSatish Balay #undef __FUNCT__
694a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ"
707fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat)
717fc3c18eSBarry Smith {
727fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
737fc3c18eSBarry Smith   int         ierr;
747fc3c18eSBarry Smith 
757fc3c18eSBarry Smith   PetscFunctionBegin;
767fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
777fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
787fc3c18eSBarry Smith   PetscFunctionReturn(0);
797fc3c18eSBarry Smith }
807fc3c18eSBarry Smith EXTERN_C_END
817fc3c18eSBarry Smith 
827fc3c18eSBarry Smith EXTERN_C_BEGIN
834a2ae208SSatish Balay #undef __FUNCT__
844a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
857fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat)
867fc3c18eSBarry Smith {
877fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
887fc3c18eSBarry Smith   int         ierr;
897fc3c18eSBarry Smith 
907fc3c18eSBarry Smith   PetscFunctionBegin;
917fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
927fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
937fc3c18eSBarry Smith   PetscFunctionReturn(0);
947fc3c18eSBarry Smith }
957fc3c18eSBarry Smith EXTERN_C_END
967fc3c18eSBarry Smith 
97537820f0SBarry Smith /*
98537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
9957b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
10057b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
10157b952d6SSatish Balay    length of colmap equals the global matrix length.
10257b952d6SSatish Balay */
1034a2ae208SSatish Balay #undef __FUNCT__
1044a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105653e4784SBarry Smith int CreateColmap_MPIBAIJ_Private(Mat mat)
10657b952d6SSatish Balay {
10757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
10857b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
109dc2900e9SSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
11057b952d6SSatish Balay 
111d64ed03dSBarry Smith   PetscFunctionBegin;
112aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
113f14a1c24SBarry Smith   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
11448e59246SSatish Balay   for (i=0; i<nbs; i++){
1150f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
11648e59246SSatish Balay   }
11748e59246SSatish Balay #else
11882502324SSatish Balay   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
119b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
120549d3d68SSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
121928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
12248e59246SSatish Balay #endif
1233a40ed3dSBarry Smith   PetscFunctionReturn(0);
12457b952d6SSatish Balay }
12557b952d6SSatish Balay 
12680c1aa95SSatish Balay #define CHUNKSIZE  10
12780c1aa95SSatish Balay 
128f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
12980c1aa95SSatish Balay { \
13080c1aa95SSatish Balay  \
13180c1aa95SSatish Balay     brow = row/bs;  \
13280c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
133ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
13480c1aa95SSatish Balay       bcol = col/bs; \
13580c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
136ab26458aSBarry Smith       low = 0; high = nrow; \
137ab26458aSBarry Smith       while (high-low > 3) { \
138ab26458aSBarry Smith         t = (low+high)/2; \
139ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
140ab26458aSBarry Smith         else              low  = t; \
141ab26458aSBarry Smith       } \
142ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
14380c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
14480c1aa95SSatish Balay         if (rp[_i] == bcol) { \
14580c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
146eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
147eada6651SSatish Balay           else                    *bap  = value;  \
148ac7a638eSSatish Balay           goto a_noinsert; \
14980c1aa95SSatish Balay         } \
15080c1aa95SSatish Balay       } \
15189280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
152a45adfd6SMatthew Knepley       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
15380c1aa95SSatish Balay       if (nrow >= rmax) { \
15480c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
15580c1aa95SSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
1563eda8832SBarry Smith         MatScalar *new_a; \
15780c1aa95SSatish Balay  \
158a45adfd6SMatthew Knepley         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
15989280ab3SLois Curfman McInnes  \
16080c1aa95SSatish Balay         /* malloc new storage space */ \
1613eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
16282502324SSatish Balay         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
16380c1aa95SSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
16480c1aa95SSatish Balay         new_i   = new_j + new_nz; \
16580c1aa95SSatish Balay  \
16680c1aa95SSatish Balay         /* copy over old data into new slots */ \
16780c1aa95SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
16880c1aa95SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
169549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
17080c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
1713eda8832SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
1723eda8832SBarry Smith         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
17387828ca2SBarry Smith         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
174549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
1753eda8832SBarry Smith                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
17680c1aa95SSatish Balay         /* free up old matrix storage */ \
177606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
178606d414cSSatish Balay         if (!a->singlemalloc) { \
179606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
180606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
181606d414cSSatish Balay         } \
18280c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1837c922b88SBarry Smith         a->singlemalloc = PETSC_TRUE; \
18480c1aa95SSatish Balay  \
18580c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
186ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
187b0a32e0cSBarry Smith         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
18880c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
18980c1aa95SSatish Balay         a->reallocs++; \
19080c1aa95SSatish Balay         a->nz++; \
19180c1aa95SSatish Balay       } \
19280c1aa95SSatish Balay       N = nrow++ - 1;  \
19380c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
19480c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
19580c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1963eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
19780c1aa95SSatish Balay       } \
1983eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
19980c1aa95SSatish Balay       rp[_i]                      = bcol;  \
20080c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
201ac7a638eSSatish Balay       a_noinsert:; \
20280c1aa95SSatish Balay     ailen[brow] = nrow; \
20380c1aa95SSatish Balay }
20457b952d6SSatish Balay 
205ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
206ac7a638eSSatish Balay { \
207ac7a638eSSatish Balay     brow = row/bs;  \
208ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
209ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
210ac7a638eSSatish Balay       bcol = col/bs; \
211ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
212ac7a638eSSatish Balay       low = 0; high = nrow; \
213ac7a638eSSatish Balay       while (high-low > 3) { \
214ac7a638eSSatish Balay         t = (low+high)/2; \
215ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
216ac7a638eSSatish Balay         else              low  = t; \
217ac7a638eSSatish Balay       } \
218ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
219ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
220ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
221ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
222ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
223ac7a638eSSatish Balay           else                    *bap  = value;  \
224ac7a638eSSatish Balay           goto b_noinsert; \
225ac7a638eSSatish Balay         } \
226ac7a638eSSatish Balay       } \
22789280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
228a45adfd6SMatthew Knepley       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
229ac7a638eSSatish Balay       if (nrow >= rmax) { \
230ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
23174c639caSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
2323eda8832SBarry Smith         MatScalar *new_a; \
233ac7a638eSSatish Balay  \
234a45adfd6SMatthew Knepley         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
23589280ab3SLois Curfman McInnes  \
236ac7a638eSSatish Balay         /* malloc new storage space */ \
2373eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
23882502324SSatish Balay         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
239ac7a638eSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
240ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
241ac7a638eSSatish Balay  \
242ac7a638eSSatish Balay         /* copy over old data into new slots */ \
243ac7a638eSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
24474c639caSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
245549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
246ac7a638eSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
2473eda8832SBarry Smith         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
2483eda8832SBarry Smith         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
2493eda8832SBarry Smith         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
250549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
2513eda8832SBarry Smith                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
252ac7a638eSSatish Balay         /* free up old matrix storage */ \
253606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
254606d414cSSatish Balay         if (!b->singlemalloc) { \
255606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
256606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
257606d414cSSatish Balay         } \
25874c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
2597c922b88SBarry Smith         b->singlemalloc = PETSC_TRUE; \
260ac7a638eSSatish Balay  \
261ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
262ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
263b0a32e0cSBarry Smith         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
26474c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
26574c639caSSatish Balay         b->reallocs++; \
26674c639caSSatish Balay         b->nz++; \
267ac7a638eSSatish Balay       } \
268ac7a638eSSatish Balay       N = nrow++ - 1;  \
269ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
270ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
271ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2723eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
273ac7a638eSSatish Balay       } \
2743eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
275ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
276ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
277ac7a638eSSatish Balay       b_noinsert:; \
278ac7a638eSSatish Balay     bilen[brow] = nrow; \
279ac7a638eSSatish Balay }
280ac7a638eSSatish Balay 
28193fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2824a2ae208SSatish Balay #undef __FUNCT__
2834a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ"
284f15d580aSBarry Smith int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
28557b952d6SSatish Balay {
2866fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
28793fea6afSBarry Smith   int         ierr,i,N = m*n;
2886fa18ffdSBarry Smith   MatScalar   *vsingle;
28993fea6afSBarry Smith 
29093fea6afSBarry Smith   PetscFunctionBegin;
2916fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2926fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
29382502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2946fa18ffdSBarry Smith     b->setvalueslen  = N;
2956fa18ffdSBarry Smith   }
2966fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2976fa18ffdSBarry Smith 
29893fea6afSBarry Smith   for (i=0; i<N; i++) {
29993fea6afSBarry Smith     vsingle[i] = v[i];
30093fea6afSBarry Smith   }
30193fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
30293fea6afSBarry Smith   PetscFunctionReturn(0);
30393fea6afSBarry Smith }
30493fea6afSBarry Smith 
3054a2ae208SSatish Balay #undef __FUNCT__
3064a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
307f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
30893fea6afSBarry Smith {
3096fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3106fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
3116fa18ffdSBarry Smith   MatScalar   *vsingle;
31293fea6afSBarry Smith 
31393fea6afSBarry Smith   PetscFunctionBegin;
3146fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3156fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
31682502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3176fa18ffdSBarry Smith     b->setvalueslen  = N;
3186fa18ffdSBarry Smith   }
3196fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
32093fea6afSBarry Smith   for (i=0; i<N; i++) {
32193fea6afSBarry Smith     vsingle[i] = v[i];
32293fea6afSBarry Smith   }
32393fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
32493fea6afSBarry Smith   PetscFunctionReturn(0);
32593fea6afSBarry Smith }
3266fa18ffdSBarry Smith 
3274a2ae208SSatish Balay #undef __FUNCT__
3284a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
329f15d580aSBarry Smith int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
3306fa18ffdSBarry Smith {
3316fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3326fa18ffdSBarry Smith   int         ierr,i,N = m*n;
3336fa18ffdSBarry Smith   MatScalar   *vsingle;
3346fa18ffdSBarry Smith 
3356fa18ffdSBarry Smith   PetscFunctionBegin;
3366fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3376fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
33882502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3396fa18ffdSBarry Smith     b->setvalueslen  = N;
3406fa18ffdSBarry Smith   }
3416fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3426fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3436fa18ffdSBarry Smith     vsingle[i] = v[i];
3446fa18ffdSBarry Smith   }
3456fa18ffdSBarry Smith   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3466fa18ffdSBarry Smith   PetscFunctionReturn(0);
3476fa18ffdSBarry Smith }
3486fa18ffdSBarry Smith 
3494a2ae208SSatish Balay #undef __FUNCT__
3504a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
351f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
3526fa18ffdSBarry Smith {
3536fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3546fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
3556fa18ffdSBarry Smith   MatScalar   *vsingle;
3566fa18ffdSBarry Smith 
3576fa18ffdSBarry Smith   PetscFunctionBegin;
3586fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3596fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
36082502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3616fa18ffdSBarry Smith     b->setvalueslen  = N;
3626fa18ffdSBarry Smith   }
3636fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3646fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3656fa18ffdSBarry Smith     vsingle[i] = v[i];
3666fa18ffdSBarry Smith   }
3676fa18ffdSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3686fa18ffdSBarry Smith   PetscFunctionReturn(0);
3696fa18ffdSBarry Smith }
37093fea6afSBarry Smith #endif
37193fea6afSBarry Smith 
3724a2ae208SSatish Balay #undef __FUNCT__
373e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
374f15d580aSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
37593fea6afSBarry Smith {
37657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
37793fea6afSBarry Smith   MatScalar   value;
378273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
3794fa0d573SSatish Balay   int         ierr,i,j,row,col;
380273d9f13SBarry Smith   int         rstart_orig=baij->rstart_bs;
3814fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
3824fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
38357b952d6SSatish Balay 
384eada6651SSatish Balay   /* Some Variables required in the macro */
38580c1aa95SSatish Balay   Mat         A = baij->A;
38680c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
387ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
3883eda8832SBarry Smith   MatScalar   *aa=a->a;
389ac7a638eSSatish Balay 
390ac7a638eSSatish Balay   Mat         B = baij->B;
391ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
392ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3933eda8832SBarry Smith   MatScalar   *ba=b->a;
394ac7a638eSSatish Balay 
395ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
396ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
3973eda8832SBarry Smith   MatScalar   *ap,*bap;
39880c1aa95SSatish Balay 
399d64ed03dSBarry Smith   PetscFunctionBegin;
40057b952d6SSatish Balay   for (i=0; i<m; i++) {
4015ef9f2a5SBarry Smith     if (im[i] < 0) continue;
402aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
403590ac198SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
404639f9d9dSBarry Smith #endif
40557b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
40657b952d6SSatish Balay       row = im[i] - rstart_orig;
40757b952d6SSatish Balay       for (j=0; j<n; j++) {
40857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
40957b952d6SSatish Balay           col = in[j] - cstart_orig;
41057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
41280c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
41373959e64SBarry Smith         } else if (in[j] < 0) continue;
414aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
415590ac198SBarry Smith         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
416639f9d9dSBarry Smith #endif
41757b952d6SSatish Balay         else {
41857b952d6SSatish Balay           if (mat->was_assembled) {
419905e6a2fSBarry Smith             if (!baij->colmap) {
420905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
421905e6a2fSBarry Smith             }
422aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4230f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
424bba1ac68SSatish Balay             col  = col - 1;
42548e59246SSatish Balay #else
426bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
42748e59246SSatish Balay #endif
42857b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
42957b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4308295de27SSatish Balay               col =  in[j];
4319bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
4329bf004c3SSatish Balay               B = baij->B;
4339bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
4349bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
4359bf004c3SSatish Balay               ba=b->a;
436bba1ac68SSatish Balay             } else col += in[j]%bs;
4378295de27SSatish Balay           } else col = in[j];
43857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
43990da58bdSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
44090da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
44157b952d6SSatish Balay         }
44257b952d6SSatish Balay       }
443d64ed03dSBarry Smith     } else {
44490f02eecSBarry Smith       if (!baij->donotstash) {
445ff2fd236SBarry Smith         if (roworiented) {
4466fa18ffdSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
447ff2fd236SBarry Smith         } else {
4486fa18ffdSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
44957b952d6SSatish Balay         }
45057b952d6SSatish Balay       }
45157b952d6SSatish Balay     }
45290f02eecSBarry Smith   }
4533a40ed3dSBarry Smith   PetscFunctionReturn(0);
45457b952d6SSatish Balay }
45557b952d6SSatish Balay 
4564a2ae208SSatish Balay #undef __FUNCT__
4574a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
458f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
459ab26458aSBarry Smith {
460ab26458aSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
461f15d580aSBarry Smith   const MatScalar *value;
462f15d580aSBarry Smith   MatScalar       *barray=baij->barray;
463273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
464273d9f13SBarry Smith   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
465ab26458aSBarry Smith   int             rend=baij->rend,cstart=baij->cstart,stepval;
466ab26458aSBarry Smith   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
467ab26458aSBarry Smith 
468b16ae2b1SBarry Smith   PetscFunctionBegin;
46930793edcSSatish Balay   if(!barray) {
47082502324SSatish Balay     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
47182502324SSatish Balay     baij->barray = barray;
47230793edcSSatish Balay   }
47330793edcSSatish Balay 
474ab26458aSBarry Smith   if (roworiented) {
475ab26458aSBarry Smith     stepval = (n-1)*bs;
476ab26458aSBarry Smith   } else {
477ab26458aSBarry Smith     stepval = (m-1)*bs;
478ab26458aSBarry Smith   }
479ab26458aSBarry Smith   for (i=0; i<m; i++) {
4805ef9f2a5SBarry Smith     if (im[i] < 0) continue;
481aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
482590ac198SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
483ab26458aSBarry Smith #endif
484ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
485ab26458aSBarry Smith       row = im[i] - rstart;
486ab26458aSBarry Smith       for (j=0; j<n; j++) {
48715b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
48815b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
489f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
49015b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
491f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
49215b57d14SSatish Balay         } else { /* Here a copy is required */
493ab26458aSBarry Smith           if (roworiented) {
494ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
495ab26458aSBarry Smith           } else {
496ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
497abef11f7SSatish Balay           }
49847513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
49947513183SBarry Smith             for (jj=0; jj<bs; jj++) {
50030793edcSSatish Balay               *barray++  = *value++;
50147513183SBarry Smith             }
50247513183SBarry Smith           }
50330793edcSSatish Balay           barray -=bs2;
50415b57d14SSatish Balay         }
505abef11f7SSatish Balay 
506abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
507abef11f7SSatish Balay           col  = in[j] - cstart;
50893fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509ab26458aSBarry Smith         }
5105ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
511aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
512590ac198SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
513ab26458aSBarry Smith #endif
514ab26458aSBarry Smith         else {
515ab26458aSBarry Smith           if (mat->was_assembled) {
516ab26458aSBarry Smith             if (!baij->colmap) {
517ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
518ab26458aSBarry Smith             }
519a5eb4965SSatish Balay 
520aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
521aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
522fa46199cSSatish Balay             { int data;
5230f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
52429bbc08cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
525fa46199cSSatish Balay             }
52648e59246SSatish Balay #else
52729bbc08cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
528a5eb4965SSatish Balay #endif
52948e59246SSatish Balay #endif
530aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
5310f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532fa46199cSSatish Balay             col  = (col - 1)/bs;
53348e59246SSatish Balay #else
534a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
53548e59246SSatish Balay #endif
536ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
538ab26458aSBarry Smith               col =  in[j];
539ab26458aSBarry Smith             }
540ab26458aSBarry Smith           }
541ab26458aSBarry Smith           else col = in[j];
54293fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543ab26458aSBarry Smith         }
544ab26458aSBarry Smith       }
545d64ed03dSBarry Smith     } else {
546ab26458aSBarry Smith       if (!baij->donotstash) {
547ff2fd236SBarry Smith         if (roworiented) {
5486fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549ff2fd236SBarry Smith         } else {
5506fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551ff2fd236SBarry Smith         }
552abef11f7SSatish Balay       }
553ab26458aSBarry Smith     }
554ab26458aSBarry Smith   }
5553a40ed3dSBarry Smith   PetscFunctionReturn(0);
556ab26458aSBarry Smith }
5576fa18ffdSBarry Smith 
5580bdbc534SSatish Balay #define HASH_KEY 0.6180339887
559c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
5606fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
5624a2ae208SSatish Balay #undef __FUNCT__
5634a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
564f15d580aSBarry Smith int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
5650bdbc534SSatish Balay {
5660bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
5680bdbc534SSatish Balay   int         ierr,i,j,row,col;
569273d9f13SBarry Smith   int         rstart_orig=baij->rstart_bs;
570c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
571c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
572329f5518SBarry Smith   PetscReal   tmp;
5733eda8832SBarry Smith   MatScalar   **HD = baij->hd,value;
574aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5754a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5764a15367fSSatish Balay #endif
5770bdbc534SSatish Balay 
5780bdbc534SSatish Balay   PetscFunctionBegin;
5790bdbc534SSatish Balay 
5800bdbc534SSatish Balay   for (i=0; i<m; i++) {
581aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
58229bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
583590ac198SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
5840bdbc534SSatish Balay #endif
5850bdbc534SSatish Balay       row = im[i];
586c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
5870bdbc534SSatish Balay       for (j=0; j<n; j++) {
5880bdbc534SSatish Balay         col = in[j];
5896fa18ffdSBarry Smith         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5900bdbc534SSatish Balay         /* Look up into the Hash Table */
591c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
592c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5930bdbc534SSatish Balay 
594c2760754SSatish Balay 
595c2760754SSatish Balay         idx = h1;
596aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
597187ce0cbSSatish Balay         insert_ct++;
598187ce0cbSSatish Balay         total_ct++;
599187ce0cbSSatish Balay         if (HT[idx] != key) {
600187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
601187ce0cbSSatish Balay           if (idx == size) {
602187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
603187ce0cbSSatish Balay             if (idx == h1) {
604a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
605187ce0cbSSatish Balay             }
606187ce0cbSSatish Balay           }
607187ce0cbSSatish Balay         }
608187ce0cbSSatish Balay #else
609c2760754SSatish Balay         if (HT[idx] != key) {
610c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
611c2760754SSatish Balay           if (idx == size) {
612c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
613c2760754SSatish Balay             if (idx == h1) {
614a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
615c2760754SSatish Balay             }
616c2760754SSatish Balay           }
617c2760754SSatish Balay         }
618187ce0cbSSatish Balay #endif
619c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
620c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
621c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
6220bdbc534SSatish Balay       }
6230bdbc534SSatish Balay     } else {
6240bdbc534SSatish Balay       if (!baij->donotstash) {
625ff2fd236SBarry Smith         if (roworiented) {
6268798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
627ff2fd236SBarry Smith         } else {
6288798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
6290bdbc534SSatish Balay         }
6300bdbc534SSatish Balay       }
6310bdbc534SSatish Balay     }
6320bdbc534SSatish Balay   }
633aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
634187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
635187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
636187ce0cbSSatish Balay #endif
6370bdbc534SSatish Balay   PetscFunctionReturn(0);
6380bdbc534SSatish Balay }
6390bdbc534SSatish Balay 
6404a2ae208SSatish Balay #undef __FUNCT__
6414a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
642f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
6430bdbc534SSatish Balay {
6440bdbc534SSatish Balay   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
645273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
6468798bf22SSatish Balay   int             ierr,i,j,ii,jj,row,col;
647273d9f13SBarry Smith   int             rstart=baij->rstart ;
648b4cc0f5aSSatish Balay   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
649c2760754SSatish Balay   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
650329f5518SBarry Smith   PetscReal       tmp;
6513eda8832SBarry Smith   MatScalar       **HD = baij->hd,*baij_a;
652f15d580aSBarry Smith   const MatScalar *v_t,*value;
653aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
6544a15367fSSatish Balay   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
6554a15367fSSatish Balay #endif
6560bdbc534SSatish Balay 
657d0a41580SSatish Balay   PetscFunctionBegin;
658d0a41580SSatish Balay 
6590bdbc534SSatish Balay   if (roworiented) {
6600bdbc534SSatish Balay     stepval = (n-1)*bs;
6610bdbc534SSatish Balay   } else {
6620bdbc534SSatish Balay     stepval = (m-1)*bs;
6630bdbc534SSatish Balay   }
6640bdbc534SSatish Balay   for (i=0; i<m; i++) {
665aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
666590ac198SBarry Smith     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
667590ac198SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
6680bdbc534SSatish Balay #endif
6690bdbc534SSatish Balay     row   = im[i];
670187ce0cbSSatish Balay     v_t   = v + i*bs2;
671c2760754SSatish Balay     if (row >= rstart && row < rend) {
6720bdbc534SSatish Balay       for (j=0; j<n; j++) {
6730bdbc534SSatish Balay         col = in[j];
6740bdbc534SSatish Balay 
6750bdbc534SSatish Balay         /* Look up into the Hash Table */
676c2760754SSatish Balay         key = row*Nbs+col+1;
677c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6780bdbc534SSatish Balay 
679c2760754SSatish Balay         idx = h1;
680aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
681187ce0cbSSatish Balay         total_ct++;
682187ce0cbSSatish Balay         insert_ct++;
683187ce0cbSSatish Balay        if (HT[idx] != key) {
684187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
685187ce0cbSSatish Balay           if (idx == size) {
686187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
687187ce0cbSSatish Balay             if (idx == h1) {
688a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
689187ce0cbSSatish Balay             }
690187ce0cbSSatish Balay           }
691187ce0cbSSatish Balay         }
692187ce0cbSSatish Balay #else
693c2760754SSatish Balay         if (HT[idx] != key) {
694c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
695c2760754SSatish Balay           if (idx == size) {
696c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
697c2760754SSatish Balay             if (idx == h1) {
698a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
699c2760754SSatish Balay             }
700c2760754SSatish Balay           }
701c2760754SSatish Balay         }
702187ce0cbSSatish Balay #endif
703c2760754SSatish Balay         baij_a = HD[idx];
7040bdbc534SSatish Balay         if (roworiented) {
705c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
706187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
707187ce0cbSSatish Balay           value = v_t;
708187ce0cbSSatish Balay           v_t  += bs;
709fef45726SSatish Balay           if (addv == ADD_VALUES) {
710c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
711c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
712fef45726SSatish Balay                 baij_a[jj]  += *value++;
713b4cc0f5aSSatish Balay               }
714b4cc0f5aSSatish Balay             }
715fef45726SSatish Balay           } else {
716c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
717c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
718fef45726SSatish Balay                 baij_a[jj]  = *value++;
719fef45726SSatish Balay               }
720fef45726SSatish Balay             }
721fef45726SSatish Balay           }
7220bdbc534SSatish Balay         } else {
7230bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
724fef45726SSatish Balay           if (addv == ADD_VALUES) {
725b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
7260bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
727fef45726SSatish Balay                 baij_a[jj]  += *value++;
728fef45726SSatish Balay               }
729fef45726SSatish Balay             }
730fef45726SSatish Balay           } else {
731fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
732fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
733fef45726SSatish Balay                 baij_a[jj]  = *value++;
734fef45726SSatish Balay               }
735b4cc0f5aSSatish Balay             }
7360bdbc534SSatish Balay           }
7370bdbc534SSatish Balay         }
7380bdbc534SSatish Balay       }
7390bdbc534SSatish Balay     } else {
7400bdbc534SSatish Balay       if (!baij->donotstash) {
7410bdbc534SSatish Balay         if (roworiented) {
7428798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7430bdbc534SSatish Balay         } else {
7448798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7450bdbc534SSatish Balay         }
7460bdbc534SSatish Balay       }
7470bdbc534SSatish Balay     }
7480bdbc534SSatish Balay   }
749aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
750187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
751187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
752187ce0cbSSatish Balay #endif
7530bdbc534SSatish Balay   PetscFunctionReturn(0);
7540bdbc534SSatish Balay }
755133cdb44SSatish Balay 
7564a2ae208SSatish Balay #undef __FUNCT__
7574a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ"
758f15d580aSBarry Smith int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
759d6de1c52SSatish Balay {
760d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
761d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
76248e59246SSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
763d6de1c52SSatish Balay 
764133cdb44SSatish Balay   PetscFunctionBegin;
765d6de1c52SSatish Balay   for (i=0; i<m; i++) {
766590ac198SBarry Smith     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
767590ac198SBarry Smith     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
768d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
769d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
770d6de1c52SSatish Balay       for (j=0; j<n; j++) {
771590ac198SBarry Smith         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
772590ac198SBarry Smith         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
773d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
774d6de1c52SSatish Balay           col = idxn[j] - bscstart;
77598dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776d64ed03dSBarry Smith         } else {
777905e6a2fSBarry Smith           if (!baij->colmap) {
778905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
779905e6a2fSBarry Smith           }
780aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7810f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
782fa46199cSSatish Balay           data --;
78348e59246SSatish Balay #else
78448e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
78548e59246SSatish Balay #endif
78648e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
787d9d09a02SSatish Balay           else {
78848e59246SSatish Balay             col  = data + idxn[j]%bs;
78998dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
790d6de1c52SSatish Balay           }
791d6de1c52SSatish Balay         }
792d6de1c52SSatish Balay       }
793d64ed03dSBarry Smith     } else {
79429bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
795d6de1c52SSatish Balay     }
796d6de1c52SSatish Balay   }
7973a40ed3dSBarry Smith  PetscFunctionReturn(0);
798d6de1c52SSatish Balay }
799d6de1c52SSatish Balay 
8004a2ae208SSatish Balay #undef __FUNCT__
8014a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ"
802064f8208SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
803d6de1c52SSatish Balay {
804d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
805d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
806acdf5bf4SSatish Balay   int        ierr,i,bs2=baij->bs2;
807329f5518SBarry Smith   PetscReal  sum = 0.0;
8083eda8832SBarry Smith   MatScalar  *v;
809d6de1c52SSatish Balay 
810d64ed03dSBarry Smith   PetscFunctionBegin;
811d6de1c52SSatish Balay   if (baij->size == 1) {
812064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
813d6de1c52SSatish Balay   } else {
814d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
815d6de1c52SSatish Balay       v = amat->a;
816d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++) {
817aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
818329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
819d6de1c52SSatish Balay #else
820d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
821d6de1c52SSatish Balay #endif
822d6de1c52SSatish Balay       }
823d6de1c52SSatish Balay       v = bmat->a;
824d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++) {
825aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
826329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
827d6de1c52SSatish Balay #else
828d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
829d6de1c52SSatish Balay #endif
830d6de1c52SSatish Balay       }
831064f8208SBarry Smith       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
832064f8208SBarry Smith       *nrm = sqrt(*nrm);
833d64ed03dSBarry Smith     } else {
83429bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835d6de1c52SSatish Balay     }
836d64ed03dSBarry Smith   }
8373a40ed3dSBarry Smith   PetscFunctionReturn(0);
838d6de1c52SSatish Balay }
83957b952d6SSatish Balay 
840bd7f49f5SSatish Balay 
841fef45726SSatish Balay /*
842fef45726SSatish Balay   Creates the hash table, and sets the table
843fef45726SSatish Balay   This table is created only once.
844fef45726SSatish Balay   If new entried need to be added to the matrix
845fef45726SSatish Balay   then the hash table has to be destroyed and
846fef45726SSatish Balay   recreated.
847fef45726SSatish Balay */
8484a2ae208SSatish Balay #undef __FUNCT__
8494a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851596b8d2eSBarry Smith {
852596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
853596b8d2eSBarry Smith   Mat         A = baij->A,B=baij->B;
854596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
8550bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856549d3d68SSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
857187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858fef45726SSatish Balay   int         *HT,key;
8593eda8832SBarry Smith   MatScalar   **HD;
860329f5518SBarry Smith   PetscReal   tmp;
861aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
8624a15367fSSatish Balay   int         ct=0,max=0;
8634a15367fSSatish Balay #endif
864fef45726SSatish Balay 
865d64ed03dSBarry Smith   PetscFunctionBegin;
8660bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
8670bdbc534SSatish Balay   size = baij->ht_size;
868fef45726SSatish Balay 
8690bdbc534SSatish Balay   if (baij->ht) {
8700bdbc534SSatish Balay     PetscFunctionReturn(0);
871596b8d2eSBarry Smith   }
8720bdbc534SSatish Balay 
873fef45726SSatish Balay   /* Allocate Memory for Hash Table */
874b0a32e0cSBarry Smith   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
875b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
876b9e4cc15SSatish Balay   HD       = baij->hd;
877a07cd24cSSatish Balay   HT       = baij->ht;
878b9e4cc15SSatish Balay 
879b9e4cc15SSatish Balay 
88087828ca2SBarry Smith   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
8810bdbc534SSatish Balay 
882596b8d2eSBarry Smith 
883596b8d2eSBarry Smith   /* Loop Over A */
8840bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
885596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
8860bdbc534SSatish Balay       row = i+rstart;
8870bdbc534SSatish Balay       col = aj[j]+cstart;
888596b8d2eSBarry Smith 
889187ce0cbSSatish Balay       key = row*Nbs + col + 1;
890c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8910bdbc534SSatish Balay       for (k=0; k<size; k++){
8920bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8930bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8940bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
895596b8d2eSBarry Smith           break;
896aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
897187ce0cbSSatish Balay         } else {
898187ce0cbSSatish Balay           ct++;
899187ce0cbSSatish Balay #endif
900596b8d2eSBarry Smith         }
901187ce0cbSSatish Balay       }
902aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
903187ce0cbSSatish Balay       if (k> max) max = k;
904187ce0cbSSatish Balay #endif
905596b8d2eSBarry Smith     }
906596b8d2eSBarry Smith   }
907596b8d2eSBarry Smith   /* Loop Over B */
9080bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
909596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
9100bdbc534SSatish Balay       row = i+rstart;
9110bdbc534SSatish Balay       col = garray[bj[j]];
912187ce0cbSSatish Balay       key = row*Nbs + col + 1;
913c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9140bdbc534SSatish Balay       for (k=0; k<size; k++){
9150bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
9160bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9170bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
918596b8d2eSBarry Smith           break;
919aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
920187ce0cbSSatish Balay         } else {
921187ce0cbSSatish Balay           ct++;
922187ce0cbSSatish Balay #endif
923596b8d2eSBarry Smith         }
924187ce0cbSSatish Balay       }
925aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
926187ce0cbSSatish Balay       if (k> max) max = k;
927187ce0cbSSatish Balay #endif
928596b8d2eSBarry Smith     }
929596b8d2eSBarry Smith   }
930596b8d2eSBarry Smith 
931596b8d2eSBarry Smith   /* Print Summary */
932aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
933c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
934596b8d2eSBarry Smith     if (HT[i]) {j++;}
935c38d4ed2SBarry Smith   }
936b0a32e0cSBarry Smith   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
937187ce0cbSSatish Balay #endif
9383a40ed3dSBarry Smith   PetscFunctionReturn(0);
939596b8d2eSBarry Smith }
94057b952d6SSatish Balay 
9414a2ae208SSatish Balay #undef __FUNCT__
9424a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
943bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944bbb85fb3SSatish Balay {
945bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
946ff2fd236SBarry Smith   int         ierr,nstash,reallocs;
947bbb85fb3SSatish Balay   InsertMode  addv;
948bbb85fb3SSatish Balay 
949bbb85fb3SSatish Balay   PetscFunctionBegin;
950bbb85fb3SSatish Balay   if (baij->donotstash) {
951bbb85fb3SSatish Balay     PetscFunctionReturn(0);
952bbb85fb3SSatish Balay   }
953bbb85fb3SSatish Balay 
954bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
955bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
956bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
95729bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
958bbb85fb3SSatish Balay   }
959bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
960bbb85fb3SSatish Balay 
9618798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
9628798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
9638798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
964b0a32e0cSBarry Smith   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
96546680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
966b0a32e0cSBarry Smith   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
967bbb85fb3SSatish Balay   PetscFunctionReturn(0);
968bbb85fb3SSatish Balay }
969bbb85fb3SSatish Balay 
9704a2ae208SSatish Balay #undef __FUNCT__
9714a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
972bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
973bbb85fb3SSatish Balay {
974bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
975a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
976a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
9777c922b88SBarry Smith   int         *row,*col,other_disassembled;
9787c922b88SBarry Smith   PetscTruth  r1,r2,r3;
9793eda8832SBarry Smith   MatScalar   *val;
980bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
981bbb85fb3SSatish Balay 
982bbb85fb3SSatish Balay   PetscFunctionBegin;
983bbb85fb3SSatish Balay   if (!baij->donotstash) {
984a2d1c673SSatish Balay     while (1) {
9858798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
986a2d1c673SSatish Balay       if (!flg) break;
987a2d1c673SSatish Balay 
988bbb85fb3SSatish Balay       for (i=0; i<n;) {
989bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
990bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
991bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
992bbb85fb3SSatish Balay         else       ncols = n-i;
993bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
99493fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
995bbb85fb3SSatish Balay         i = j;
996bbb85fb3SSatish Balay       }
997bbb85fb3SSatish Balay     }
9988798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
999a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1000a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1001a2d1c673SSatish Balay        restore the original flags */
1002a2d1c673SSatish Balay     r1 = baij->roworiented;
1003a2d1c673SSatish Balay     r2 = a->roworiented;
1004a2d1c673SSatish Balay     r3 = b->roworiented;
10057c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
10067c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
10077c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
1008a2d1c673SSatish Balay     while (1) {
10098798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1010a2d1c673SSatish Balay       if (!flg) break;
1011a2d1c673SSatish Balay 
1012a2d1c673SSatish Balay       for (i=0; i<n;) {
1013a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1014a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1015a2d1c673SSatish Balay         if (j < n) ncols = j-i;
1016a2d1c673SSatish Balay         else       ncols = n-i;
101793fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1018a2d1c673SSatish Balay         i = j;
1019a2d1c673SSatish Balay       }
1020a2d1c673SSatish Balay     }
10218798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1022a2d1c673SSatish Balay     baij->roworiented = r1;
1023a2d1c673SSatish Balay     a->roworiented    = r2;
1024a2d1c673SSatish Balay     b->roworiented    = r3;
1025bbb85fb3SSatish Balay   }
1026bbb85fb3SSatish Balay 
1027bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1028bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1029bbb85fb3SSatish Balay 
1030bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
1031bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1032bbb85fb3SSatish Balay   /*
1033bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1034bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1035bbb85fb3SSatish Balay   */
1036bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1037bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1038bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1039bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1040bbb85fb3SSatish Balay     }
1041bbb85fb3SSatish Balay   }
1042bbb85fb3SSatish Balay 
1043bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1044bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1045bbb85fb3SSatish Balay   }
1046bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1047bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1048bbb85fb3SSatish Balay 
1049aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
1050bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1051f6275e2eSBarry Smith     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1052bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1053bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1054bbb85fb3SSatish Balay   }
1055bbb85fb3SSatish Balay #endif
1056bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1057bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1058bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1059bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1060bbb85fb3SSatish Balay   }
1061bbb85fb3SSatish Balay 
1062606d414cSSatish Balay   if (baij->rowvalues) {
1063606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1064606d414cSSatish Balay     baij->rowvalues = 0;
1065606d414cSSatish Balay   }
1066bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1067bbb85fb3SSatish Balay }
106857b952d6SSatish Balay 
10694a2ae208SSatish Balay #undef __FUNCT__
10704a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1071b0a32e0cSBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
107257b952d6SSatish Balay {
107357b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1074fb9695e5SSatish Balay   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
10756831982aSBarry Smith   PetscTruth        isascii,isdraw;
1076b0a32e0cSBarry Smith   PetscViewer       sviewer;
1077f3ef73ceSBarry Smith   PetscViewerFormat format;
107857b952d6SSatish Balay 
1079d64ed03dSBarry Smith   PetscFunctionBegin;
1080f81bd7ccSHong Zhang   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1081b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1082fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
10830f5bd95cSBarry Smith   if (isascii) {
1084b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1085456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
10864e220ebcSLois Curfman McInnes       MatInfo info;
1087d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1088d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1089b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1090273d9f13SBarry Smith               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
10916831982aSBarry Smith               baij->bs,(int)info.memory);CHKERRQ(ierr);
1092d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1093b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1094d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1095b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1096b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
109757b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
10983a40ed3dSBarry Smith       PetscFunctionReturn(0);
1099fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1100b0a32e0cSBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
11013a40ed3dSBarry Smith       PetscFunctionReturn(0);
110204929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
110304929863SHong Zhang       PetscFunctionReturn(0);
110457b952d6SSatish Balay     }
110557b952d6SSatish Balay   }
110657b952d6SSatish Balay 
11070f5bd95cSBarry Smith   if (isdraw) {
1108b0a32e0cSBarry Smith     PetscDraw       draw;
110957b952d6SSatish Balay     PetscTruth isnull;
1110b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1111b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
111257b952d6SSatish Balay   }
111357b952d6SSatish Balay 
111457b952d6SSatish Balay   if (size == 1) {
1115873048abSBarry Smith     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
111657b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1117d64ed03dSBarry Smith   } else {
111857b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
111957b952d6SSatish Balay     Mat         A;
112057b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1121273d9f13SBarry Smith     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
11223eda8832SBarry Smith     MatScalar   *a;
112357b952d6SSatish Balay 
1124f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1125f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
112657b952d6SSatish Balay     if (!rank) {
1127f204ca49SKris Buschelman       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1128d64ed03dSBarry Smith     } else {
1129f204ca49SKris Buschelman       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
113057b952d6SSatish Balay     }
1131f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1132f204ca49SKris Buschelman     ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1133b0a32e0cSBarry Smith     PetscLogObjectParent(mat,A);
113457b952d6SSatish Balay 
113557b952d6SSatish Balay     /* copy over the A part */
113657b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
113757b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
113882502324SSatish Balay     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
113957b952d6SSatish Balay 
114057b952d6SSatish Balay     for (i=0; i<mbs; i++) {
114157b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
114257b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
114357b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
114457b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
114557b952d6SSatish Balay         for (k=0; k<bs; k++) {
114693fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1147cee3aa6bSSatish Balay           col++; a += bs;
114857b952d6SSatish Balay         }
114957b952d6SSatish Balay       }
115057b952d6SSatish Balay     }
115157b952d6SSatish Balay     /* copy over the B part */
115257b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
115357b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
115457b952d6SSatish Balay     for (i=0; i<mbs; i++) {
115557b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
115657b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
115757b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
115857b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
115957b952d6SSatish Balay         for (k=0; k<bs; k++) {
116093fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1161cee3aa6bSSatish Balay           col++; a += bs;
116257b952d6SSatish Balay         }
116357b952d6SSatish Balay       }
116457b952d6SSatish Balay     }
1165606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
11666d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
11676d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
116855843e3eSBarry Smith     /*
116955843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1170b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
117155843e3eSBarry Smith     */
1172b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1173f14a1c24SBarry Smith     if (!rank) {
1174e36acaf3SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
11756831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
117657b952d6SSatish Balay     }
1177b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
117857b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
117957b952d6SSatish Balay   }
11803a40ed3dSBarry Smith   PetscFunctionReturn(0);
118157b952d6SSatish Balay }
118257b952d6SSatish Balay 
11834a2ae208SSatish Balay #undef __FUNCT__
11844a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ"
1185b0a32e0cSBarry Smith int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
118657b952d6SSatish Balay {
118757b952d6SSatish Balay   int        ierr;
11886831982aSBarry Smith   PetscTruth isascii,isdraw,issocket,isbinary;
118957b952d6SSatish Balay 
1190d64ed03dSBarry Smith   PetscFunctionBegin;
1191b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1192fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1193b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1194fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
11950f5bd95cSBarry Smith   if (isascii || isdraw || issocket || isbinary) {
11967b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
11975cd90555SBarry Smith   } else {
119829bbc08cSBarry Smith     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
119957b952d6SSatish Balay   }
12003a40ed3dSBarry Smith   PetscFunctionReturn(0);
120157b952d6SSatish Balay }
120257b952d6SSatish Balay 
12034a2ae208SSatish Balay #undef __FUNCT__
12044a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ"
1205e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
120679bdfe76SSatish Balay {
120779bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
120879bdfe76SSatish Balay   int         ierr;
120979bdfe76SSatish Balay 
1210d64ed03dSBarry Smith   PetscFunctionBegin;
1211aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1212b0a32e0cSBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
121379bdfe76SSatish Balay #endif
12148798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12158798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1216606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
121779bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
121879bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1219aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
12200f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
122148e59246SSatish Balay #else
1222606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
122348e59246SSatish Balay #endif
1224606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1225606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1226606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1227606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1228606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1229606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
12306fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
12316fa18ffdSBarry Smith   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
12326fa18ffdSBarry Smith #endif
1233606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
12343a40ed3dSBarry Smith   PetscFunctionReturn(0);
123579bdfe76SSatish Balay }
123679bdfe76SSatish Balay 
12374a2ae208SSatish Balay #undef __FUNCT__
12384a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ"
1239ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1240cee3aa6bSSatish Balay {
1241cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
124247b4a8eaSLois Curfman McInnes   int         ierr,nt;
1243cee3aa6bSSatish Balay 
1244d64ed03dSBarry Smith   PetscFunctionBegin;
1245e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1246273d9f13SBarry Smith   if (nt != A->n) {
124729bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
124847b4a8eaSLois Curfman McInnes   }
1249e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1250273d9f13SBarry Smith   if (nt != A->m) {
125129bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
125247b4a8eaSLois Curfman McInnes   }
125343a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1254f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
125543a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1256f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
125743a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
12583a40ed3dSBarry Smith   PetscFunctionReturn(0);
1259cee3aa6bSSatish Balay }
1260cee3aa6bSSatish Balay 
12614a2ae208SSatish Balay #undef __FUNCT__
12624a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1263ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1264cee3aa6bSSatish Balay {
1265cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1266cee3aa6bSSatish Balay   int        ierr;
1267d64ed03dSBarry Smith 
1268d64ed03dSBarry Smith   PetscFunctionBegin;
126943a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1270f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
127143a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1272f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12733a40ed3dSBarry Smith   PetscFunctionReturn(0);
1274cee3aa6bSSatish Balay }
1275cee3aa6bSSatish Balay 
12764a2ae208SSatish Balay #undef __FUNCT__
12774a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
12787c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1279cee3aa6bSSatish Balay {
1280cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1281cee3aa6bSSatish Balay   int         ierr;
1282cee3aa6bSSatish Balay 
1283d64ed03dSBarry Smith   PetscFunctionBegin;
1284cee3aa6bSSatish Balay   /* do nondiagonal part */
12857c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1286cee3aa6bSSatish Balay   /* send it on its way */
1287537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1288cee3aa6bSSatish Balay   /* do local part */
12897c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1290cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1291cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1292cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1293639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12943a40ed3dSBarry Smith   PetscFunctionReturn(0);
1295cee3aa6bSSatish Balay }
1296cee3aa6bSSatish Balay 
12974a2ae208SSatish Balay #undef __FUNCT__
12984a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
12997c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1300cee3aa6bSSatish Balay {
1301cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1302cee3aa6bSSatish Balay   int         ierr;
1303cee3aa6bSSatish Balay 
1304d64ed03dSBarry Smith   PetscFunctionBegin;
1305cee3aa6bSSatish Balay   /* do nondiagonal part */
13067c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1307cee3aa6bSSatish Balay   /* send it on its way */
1308537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1309cee3aa6bSSatish Balay   /* do local part */
13107c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1311cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1312cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1313cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1314537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
13153a40ed3dSBarry Smith   PetscFunctionReturn(0);
1316cee3aa6bSSatish Balay }
1317cee3aa6bSSatish Balay 
1318cee3aa6bSSatish Balay /*
1319cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1320cee3aa6bSSatish Balay    diagonal block
1321cee3aa6bSSatish Balay */
13224a2ae208SSatish Balay #undef __FUNCT__
13234a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1324ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1325cee3aa6bSSatish Balay {
1326cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
13273a40ed3dSBarry Smith   int         ierr;
1328d64ed03dSBarry Smith 
1329d64ed03dSBarry Smith   PetscFunctionBegin;
1330273d9f13SBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13313a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13323a40ed3dSBarry Smith   PetscFunctionReturn(0);
1333cee3aa6bSSatish Balay }
1334cee3aa6bSSatish Balay 
13354a2ae208SSatish Balay #undef __FUNCT__
13364a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ"
1337268466fbSBarry Smith int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1338cee3aa6bSSatish Balay {
1339cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1340cee3aa6bSSatish Balay   int         ierr;
1341d64ed03dSBarry Smith 
1342d64ed03dSBarry Smith   PetscFunctionBegin;
1343cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1344cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
13453a40ed3dSBarry Smith   PetscFunctionReturn(0);
1346cee3aa6bSSatish Balay }
1347026e39d0SSatish Balay 
13484a2ae208SSatish Balay #undef __FUNCT__
13494a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ"
135087828ca2SBarry Smith int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1351acdf5bf4SSatish Balay {
1352acdf5bf4SSatish Balay   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
135387828ca2SBarry Smith   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1354acdf5bf4SSatish Balay   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1355d9d09a02SSatish Balay   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1356d9d09a02SSatish Balay   int          *cmap,*idx_p,cstart = mat->cstart;
1357acdf5bf4SSatish Balay 
1358d64ed03dSBarry Smith   PetscFunctionBegin;
135929bbc08cSBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1360acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1361acdf5bf4SSatish Balay 
1362acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1363acdf5bf4SSatish Balay     /*
1364acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1365acdf5bf4SSatish Balay     */
1366acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1367bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1368bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1369acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1370acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1371acdf5bf4SSatish Balay     }
137287828ca2SBarry Smith     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1373acdf5bf4SSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1374acdf5bf4SSatish Balay   }
1375acdf5bf4SSatish Balay 
137629bbc08cSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1377d9d09a02SSatish Balay   lrow = row - brstart;
1378acdf5bf4SSatish Balay 
1379acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1380acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1381acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1382f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1383f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1384acdf5bf4SSatish Balay   nztot = nzA + nzB;
1385acdf5bf4SSatish Balay 
1386acdf5bf4SSatish Balay   cmap  = mat->garray;
1387acdf5bf4SSatish Balay   if (v  || idx) {
1388acdf5bf4SSatish Balay     if (nztot) {
1389acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1390acdf5bf4SSatish Balay       int imark = -1;
1391acdf5bf4SSatish Balay       if (v) {
1392acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1393acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1394d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1395acdf5bf4SSatish Balay           else break;
1396acdf5bf4SSatish Balay         }
1397acdf5bf4SSatish Balay         imark = i;
1398acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1399acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1400acdf5bf4SSatish Balay       }
1401acdf5bf4SSatish Balay       if (idx) {
1402acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1403acdf5bf4SSatish Balay         if (imark > -1) {
1404acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1405bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1406acdf5bf4SSatish Balay           }
1407acdf5bf4SSatish Balay         } else {
1408acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1409d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1410d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1411acdf5bf4SSatish Balay             else break;
1412acdf5bf4SSatish Balay           }
1413acdf5bf4SSatish Balay           imark = i;
1414acdf5bf4SSatish Balay         }
1415d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1416d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1417acdf5bf4SSatish Balay       }
1418d64ed03dSBarry Smith     } else {
1419d212a18eSSatish Balay       if (idx) *idx = 0;
1420d212a18eSSatish Balay       if (v)   *v   = 0;
1421d212a18eSSatish Balay     }
1422acdf5bf4SSatish Balay   }
1423acdf5bf4SSatish Balay   *nz = nztot;
1424f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1425f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14263a40ed3dSBarry Smith   PetscFunctionReturn(0);
1427acdf5bf4SSatish Balay }
1428acdf5bf4SSatish Balay 
14294a2ae208SSatish Balay #undef __FUNCT__
14304a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
143187828ca2SBarry Smith int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1432acdf5bf4SSatish Balay {
1433acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1434d64ed03dSBarry Smith 
1435d64ed03dSBarry Smith   PetscFunctionBegin;
1436acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
143729bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1438acdf5bf4SSatish Balay   }
1439acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14403a40ed3dSBarry Smith   PetscFunctionReturn(0);
1441acdf5bf4SSatish Balay }
1442acdf5bf4SSatish Balay 
14434a2ae208SSatish Balay #undef __FUNCT__
14444a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1445ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
14465a838052SSatish Balay {
14475a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1448d64ed03dSBarry Smith 
1449d64ed03dSBarry Smith   PetscFunctionBegin;
14505a838052SSatish Balay   *bs = baij->bs;
14513a40ed3dSBarry Smith   PetscFunctionReturn(0);
14525a838052SSatish Balay }
14535a838052SSatish Balay 
14544a2ae208SSatish Balay #undef __FUNCT__
14554a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1456ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
145758667388SSatish Balay {
145858667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
145958667388SSatish Balay   int         ierr;
1460d64ed03dSBarry Smith 
1461d64ed03dSBarry Smith   PetscFunctionBegin;
146258667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
146358667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14643a40ed3dSBarry Smith   PetscFunctionReturn(0);
146558667388SSatish Balay }
14660ac07820SSatish Balay 
14674a2ae208SSatish Balay #undef __FUNCT__
14684a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1469ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14700ac07820SSatish Balay {
14714e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
14724e220ebcSLois Curfman McInnes   Mat         A = a->A,B = a->B;
14737d57db60SLois Curfman McInnes   int         ierr;
1474329f5518SBarry Smith   PetscReal   isend[5],irecv[5];
14750ac07820SSatish Balay 
1476d64ed03dSBarry Smith   PetscFunctionBegin;
1477f6275e2eSBarry Smith   info->block_size     = (PetscReal)a->bs;
14784e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
14790e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1480de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14814e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
14820e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1483de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
14840ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14854e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14864e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14874e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14884e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14894e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14900ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1491d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
14924e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14934e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14944e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14954e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14964e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14970ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1498d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
14994e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15004e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15014e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15024e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15034e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1504d41123aaSBarry Smith   } else {
150529bbc08cSBarry Smith     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
15060ac07820SSatish Balay   }
1507f6275e2eSBarry Smith   info->rows_global       = (PetscReal)A->M;
1508f6275e2eSBarry Smith   info->columns_global    = (PetscReal)A->N;
1509f6275e2eSBarry Smith   info->rows_local        = (PetscReal)A->m;
1510f6275e2eSBarry Smith   info->columns_local     = (PetscReal)A->N;
15114e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
15124e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
15134e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
15143a40ed3dSBarry Smith   PetscFunctionReturn(0);
15150ac07820SSatish Balay }
15160ac07820SSatish Balay 
15174a2ae208SSatish Balay #undef __FUNCT__
15184a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ"
1519ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
152058667388SSatish Balay {
152158667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
152298305bb5SBarry Smith   int         ierr;
152358667388SSatish Balay 
1524d64ed03dSBarry Smith   PetscFunctionBegin;
152512c028f9SKris Buschelman   switch (op) {
152612c028f9SKris Buschelman   case MAT_NO_NEW_NONZERO_LOCATIONS:
152712c028f9SKris Buschelman   case MAT_YES_NEW_NONZERO_LOCATIONS:
152812c028f9SKris Buschelman   case MAT_COLUMNS_UNSORTED:
152912c028f9SKris Buschelman   case MAT_COLUMNS_SORTED:
153012c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
153112c028f9SKris Buschelman   case MAT_KEEP_ZEROED_ROWS:
153212c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
153398305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
153498305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
153512c028f9SKris Buschelman     break;
153612c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
15377c922b88SBarry Smith     a->roworiented = PETSC_TRUE;
153898305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
153998305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
154012c028f9SKris Buschelman     break;
154112c028f9SKris Buschelman   case MAT_ROWS_SORTED:
154212c028f9SKris Buschelman   case MAT_ROWS_UNSORTED:
154312c028f9SKris Buschelman   case MAT_YES_NEW_DIAGONALS:
1544b0a32e0cSBarry Smith     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
154512c028f9SKris Buschelman     break;
154612c028f9SKris Buschelman   case MAT_COLUMN_ORIENTED:
15477c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
154898305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
154998305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
155012c028f9SKris Buschelman     break;
155112c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15527c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
155312c028f9SKris Buschelman     break;
155412c028f9SKris Buschelman   case MAT_NO_NEW_DIAGONALS:
155529bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
155612c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
15577c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
155812c028f9SKris Buschelman     break;
155977e54ba9SKris Buschelman   case MAT_SYMMETRIC:
156077e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
1561*2188ac68SBarry Smith   case MAT_HERMITIAN:
1562*2188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1563*2188ac68SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1564*2188ac68SBarry Smith     break;
15659a4540c5SBarry Smith   case MAT_NOT_SYMMETRIC:
15669a4540c5SBarry Smith   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
15679a4540c5SBarry Smith   case MAT_NOT_HERMITIAN:
15689a4540c5SBarry Smith   case MAT_NOT_SYMMETRY_ETERNAL:
156977e54ba9SKris Buschelman     break;
157012c028f9SKris Buschelman   default:
157129bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"unknown option");
1572d64ed03dSBarry Smith   }
15733a40ed3dSBarry Smith   PetscFunctionReturn(0);
157458667388SSatish Balay }
157558667388SSatish Balay 
15764a2ae208SSatish Balay #undef __FUNCT__
15774a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ("
1578ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15790ac07820SSatish Balay {
15800ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
15810ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15820ac07820SSatish Balay   Mat         B;
1583273d9f13SBarry Smith   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
15840ac07820SSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
15853eda8832SBarry Smith   MatScalar   *a;
15860ac07820SSatish Balay 
1587d64ed03dSBarry Smith   PetscFunctionBegin;
158829bbc08cSBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1589f204ca49SKris Buschelman   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1590f204ca49SKris Buschelman   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1591f204ca49SKris Buschelman   ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
15920ac07820SSatish Balay 
15930ac07820SSatish Balay   /* copy over the A part */
15940ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15950ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
159682502324SSatish Balay   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
15970ac07820SSatish Balay 
15980ac07820SSatish Balay   for (i=0; i<mbs; i++) {
15990ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16000ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16010ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16020ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
16030ac07820SSatish Balay       for (k=0; k<bs; k++) {
160493fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16050ac07820SSatish Balay         col++; a += bs;
16060ac07820SSatish Balay       }
16070ac07820SSatish Balay     }
16080ac07820SSatish Balay   }
16090ac07820SSatish Balay   /* copy over the B part */
16100ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
16110ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
16120ac07820SSatish Balay   for (i=0; i<mbs; i++) {
16130ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16140ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16150ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16160ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16170ac07820SSatish Balay       for (k=0; k<bs; k++) {
161893fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16190ac07820SSatish Balay         col++; a += bs;
16200ac07820SSatish Balay       }
16210ac07820SSatish Balay     }
16220ac07820SSatish Balay   }
1623606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
16240ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16250ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16260ac07820SSatish Balay 
16277c922b88SBarry Smith   if (matout) {
16280ac07820SSatish Balay     *matout = B;
16290ac07820SSatish Balay   } else {
1630273d9f13SBarry Smith     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
16310ac07820SSatish Balay   }
16323a40ed3dSBarry Smith   PetscFunctionReturn(0);
16330ac07820SSatish Balay }
16340e95ebc0SSatish Balay 
16354a2ae208SSatish Balay #undef __FUNCT__
16364a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
163736c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16380e95ebc0SSatish Balay {
163936c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
164036c4a09eSSatish Balay   Mat         a = baij->A,b = baij->B;
16410e95ebc0SSatish Balay   int         ierr,s1,s2,s3;
16420e95ebc0SSatish Balay 
1643d64ed03dSBarry Smith   PetscFunctionBegin;
164436c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
164536c4a09eSSatish Balay   if (rr) {
164636c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
164729bbc08cSBarry Smith     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
164836c4a09eSSatish Balay     /* Overlap communication with computation. */
164936c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
165036c4a09eSSatish Balay   }
16510e95ebc0SSatish Balay   if (ll) {
16520e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
165329bbc08cSBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1654a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
16550e95ebc0SSatish Balay   }
165636c4a09eSSatish Balay   /* scale  the diagonal block */
165736c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
165836c4a09eSSatish Balay 
165936c4a09eSSatish Balay   if (rr) {
166036c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
166136c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1662a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
166336c4a09eSSatish Balay   }
166436c4a09eSSatish Balay 
16653a40ed3dSBarry Smith   PetscFunctionReturn(0);
16660e95ebc0SSatish Balay }
16670e95ebc0SSatish Balay 
16684a2ae208SSatish Balay #undef __FUNCT__
16694a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1670268466fbSBarry Smith int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
16710ac07820SSatish Balay {
16720ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
16730ac07820SSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1674c1dc657dSBarry Smith   int            *nprocs,j,idx,nsends,row;
16750ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
16760ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1677a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16780ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16790ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16800ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16810ac07820SSatish Balay   IS             istmp;
168235d8aa7fSBarry Smith   PetscTruth     found;
16830ac07820SSatish Balay 
1684d64ed03dSBarry Smith   PetscFunctionBegin;
1685f14a1c24SBarry Smith   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
16860ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
16870ac07820SSatish Balay 
16880ac07820SSatish Balay   /*  first count number of contributors to each processor */
168982502324SSatish Balay   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1690549d3d68SSatish Balay   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1691b0a32e0cSBarry Smith   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
16920ac07820SSatish Balay   for (i=0; i<N; i++) {
16930ac07820SSatish Balay     idx   = rows[i];
169435d8aa7fSBarry Smith     found = PETSC_FALSE;
16950ac07820SSatish Balay     for (j=0; j<size; j++) {
16960ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1697c1dc657dSBarry Smith         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
16980ac07820SSatish Balay       }
16990ac07820SSatish Balay     }
170029bbc08cSBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
17010ac07820SSatish Balay   }
1702c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
17030ac07820SSatish Balay 
17040ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
1705c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
17060ac07820SSatish Balay 
17070ac07820SSatish Balay   /* post receives:   */
1708b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1709b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
17100ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1711ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
17120ac07820SSatish Balay   }
17130ac07820SSatish Balay 
17140ac07820SSatish Balay   /* do sends:
17150ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
17160ac07820SSatish Balay      the ith processor
17170ac07820SSatish Balay   */
1718b0a32e0cSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1719b0a32e0cSBarry Smith   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1720b0a32e0cSBarry Smith   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
17210ac07820SSatish Balay   starts[0]  = 0;
1722c1dc657dSBarry Smith   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17230ac07820SSatish Balay   for (i=0; i<N; i++) {
17240ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
17250ac07820SSatish Balay   }
17266831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
17270ac07820SSatish Balay 
17280ac07820SSatish Balay   starts[0] = 0;
1729c1dc657dSBarry Smith   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17300ac07820SSatish Balay   count = 0;
17310ac07820SSatish Balay   for (i=0; i<size; i++) {
1732c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
1733c1dc657dSBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17340ac07820SSatish Balay     }
17350ac07820SSatish Balay   }
1736606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
17370ac07820SSatish Balay 
17380ac07820SSatish Balay   base = owners[rank]*bs;
17390ac07820SSatish Balay 
17400ac07820SSatish Balay   /*  wait on receives */
1741b0a32e0cSBarry Smith   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
17420ac07820SSatish Balay   source = lens + nrecvs;
17430ac07820SSatish Balay   count  = nrecvs; slen = 0;
17440ac07820SSatish Balay   while (count) {
1745ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17460ac07820SSatish Balay     /* unpack receives into our local space */
1747ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
17480ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17490ac07820SSatish Balay     lens[imdex]    = n;
17500ac07820SSatish Balay     slen          += n;
17510ac07820SSatish Balay     count--;
17520ac07820SSatish Balay   }
1753606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
17540ac07820SSatish Balay 
17550ac07820SSatish Balay   /* move the data into the send scatter */
1756b0a32e0cSBarry Smith   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
17570ac07820SSatish Balay   count = 0;
17580ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
17590ac07820SSatish Balay     values = rvalues + i*nmax;
17600ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
17610ac07820SSatish Balay       lrows[count++] = values[j] - base;
17620ac07820SSatish Balay     }
17630ac07820SSatish Balay   }
1764606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1765606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1766606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1767606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
17680ac07820SSatish Balay 
17690ac07820SSatish Balay   /* actually zap the local rows */
1770029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1771b0a32e0cSBarry Smith   PetscLogObjectParent(A,istmp);
1772a07cd24cSSatish Balay 
177372dacd9aSBarry Smith   /*
177472dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
177572dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
177672dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
177772dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
177872dacd9aSBarry Smith 
177972dacd9aSBarry Smith        Contributed by: Mathew Knepley
178072dacd9aSBarry Smith   */
17819c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
17826fa18ffdSBarry Smith   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
17839c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
17846fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
17859c957beeSSatish Balay   } else if (diag) {
17866fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1787fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
178829bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1789fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
17906525c446SSatish Balay     }
1791a07cd24cSSatish Balay     for (i=0; i<slen; i++) {
1792a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
17933f11ea53SBarry Smith       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1794a07cd24cSSatish Balay     }
1795a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17979c957beeSSatish Balay   } else {
17986fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1799a07cd24cSSatish Balay   }
18009c957beeSSatish Balay 
18019c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1802606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1803a07cd24cSSatish Balay 
18040ac07820SSatish Balay   /* wait on sends */
18050ac07820SSatish Balay   if (nsends) {
180682502324SSatish Balay     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1807ca161407SBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1808606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
18090ac07820SSatish Balay   }
1810606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1811606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
18120ac07820SSatish Balay 
18133a40ed3dSBarry Smith   PetscFunctionReturn(0);
18140ac07820SSatish Balay }
181572dacd9aSBarry Smith 
18164a2ae208SSatish Balay #undef __FUNCT__
18174a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1818ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1819ba4ca20aSSatish Balay {
1820ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
182125fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1822133cdb44SSatish Balay   static int  called = 0;
18233a40ed3dSBarry Smith   int         ierr;
1824ba4ca20aSSatish Balay 
1825d64ed03dSBarry Smith   PetscFunctionBegin;
18263a40ed3dSBarry Smith   if (!a->rank) {
18273a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
182825fdafccSSatish Balay   }
182925fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1830d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1831d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
18323a40ed3dSBarry Smith   PetscFunctionReturn(0);
1833ba4ca20aSSatish Balay }
18340ac07820SSatish Balay 
18354a2ae208SSatish Balay #undef __FUNCT__
18364a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1837ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1838bb5a7306SBarry Smith {
1839bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1840bb5a7306SBarry Smith   int         ierr;
1841d64ed03dSBarry Smith 
1842d64ed03dSBarry Smith   PetscFunctionBegin;
1843bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
18443a40ed3dSBarry Smith   PetscFunctionReturn(0);
1845bb5a7306SBarry Smith }
1846bb5a7306SBarry Smith 
18472e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18480ac07820SSatish Balay 
18494a2ae208SSatish Balay #undef __FUNCT__
18504a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ"
18517fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
18527fc3c18eSBarry Smith {
18537fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
18547fc3c18eSBarry Smith   Mat         a,b,c,d;
18557fc3c18eSBarry Smith   PetscTruth  flg;
18567fc3c18eSBarry Smith   int         ierr;
18577fc3c18eSBarry Smith 
18587fc3c18eSBarry Smith   PetscFunctionBegin;
18597fc3c18eSBarry Smith   a = matA->A; b = matA->B;
18607fc3c18eSBarry Smith   c = matB->A; d = matB->B;
18617fc3c18eSBarry Smith 
18627fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
18637fc3c18eSBarry Smith   if (flg == PETSC_TRUE) {
18647fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18657fc3c18eSBarry Smith   }
18667fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
18677fc3c18eSBarry Smith   PetscFunctionReturn(0);
18687fc3c18eSBarry Smith }
18697fc3c18eSBarry Smith 
1870273d9f13SBarry Smith 
18714a2ae208SSatish Balay #undef __FUNCT__
18724a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1873273d9f13SBarry Smith int MatSetUpPreallocation_MPIBAIJ(Mat A)
1874273d9f13SBarry Smith {
1875273d9f13SBarry Smith   int        ierr;
1876273d9f13SBarry Smith 
1877273d9f13SBarry Smith   PetscFunctionBegin;
1878273d9f13SBarry Smith   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1879273d9f13SBarry Smith   PetscFunctionReturn(0);
1880273d9f13SBarry Smith }
1881273d9f13SBarry Smith 
188279bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1883cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1884cc2dc46cSBarry Smith        MatSetValues_MPIBAIJ,
1885cc2dc46cSBarry Smith        MatGetRow_MPIBAIJ,
1886cc2dc46cSBarry Smith        MatRestoreRow_MPIBAIJ,
1887cc2dc46cSBarry Smith        MatMult_MPIBAIJ,
188897304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ,
18897c922b88SBarry Smith        MatMultTranspose_MPIBAIJ,
18907c922b88SBarry Smith        MatMultTransposeAdd_MPIBAIJ,
1891cc2dc46cSBarry Smith        0,
1892cc2dc46cSBarry Smith        0,
1893cc2dc46cSBarry Smith        0,
189497304618SKris Buschelman /*10*/ 0,
1895cc2dc46cSBarry Smith        0,
1896cc2dc46cSBarry Smith        0,
1897cc2dc46cSBarry Smith        0,
1898cc2dc46cSBarry Smith        MatTranspose_MPIBAIJ,
189997304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ,
19007fc3c18eSBarry Smith        MatEqual_MPIBAIJ,
1901cc2dc46cSBarry Smith        MatGetDiagonal_MPIBAIJ,
1902cc2dc46cSBarry Smith        MatDiagonalScale_MPIBAIJ,
1903cc2dc46cSBarry Smith        MatNorm_MPIBAIJ,
190497304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ,
1905cc2dc46cSBarry Smith        MatAssemblyEnd_MPIBAIJ,
1906cc2dc46cSBarry Smith        0,
1907cc2dc46cSBarry Smith        MatSetOption_MPIBAIJ,
1908cc2dc46cSBarry Smith        MatZeroEntries_MPIBAIJ,
190997304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ,
1910cc2dc46cSBarry Smith        0,
1911cc2dc46cSBarry Smith        0,
1912cc2dc46cSBarry Smith        0,
1913cc2dc46cSBarry Smith        0,
191497304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ,
1915273d9f13SBarry Smith        0,
1916cc2dc46cSBarry Smith        0,
1917cc2dc46cSBarry Smith        0,
1918cc2dc46cSBarry Smith        0,
191997304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ,
1920cc2dc46cSBarry Smith        0,
1921cc2dc46cSBarry Smith        0,
1922cc2dc46cSBarry Smith        0,
1923cc2dc46cSBarry Smith        0,
192497304618SKris Buschelman /*40*/ 0,
1925cc2dc46cSBarry Smith        MatGetSubMatrices_MPIBAIJ,
1926cc2dc46cSBarry Smith        MatIncreaseOverlap_MPIBAIJ,
1927cc2dc46cSBarry Smith        MatGetValues_MPIBAIJ,
1928cc2dc46cSBarry Smith        0,
192997304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ,
1930cc2dc46cSBarry Smith        MatScale_MPIBAIJ,
1931cc2dc46cSBarry Smith        0,
1932cc2dc46cSBarry Smith        0,
1933cc2dc46cSBarry Smith        0,
193497304618SKris Buschelman /*50*/ MatGetBlockSize_MPIBAIJ,
1935cc2dc46cSBarry Smith        0,
1936cc2dc46cSBarry Smith        0,
1937cc2dc46cSBarry Smith        0,
1938cc2dc46cSBarry Smith        0,
193997304618SKris Buschelman /*55*/ 0,
1940cc2dc46cSBarry Smith        0,
1941cc2dc46cSBarry Smith        MatSetUnfactored_MPIBAIJ,
1942cc2dc46cSBarry Smith        0,
1943cc2dc46cSBarry Smith        MatSetValuesBlocked_MPIBAIJ,
194497304618SKris Buschelman /*60*/ 0,
1945f14a1c24SBarry Smith        MatDestroy_MPIBAIJ,
1946f14a1c24SBarry Smith        MatView_MPIBAIJ,
19478a124369SBarry Smith        MatGetPetscMaps_Petsc,
19487843d17aSBarry Smith        0,
194997304618SKris Buschelman /*65*/ 0,
19507843d17aSBarry Smith        0,
19517843d17aSBarry Smith        0,
19527843d17aSBarry Smith        0,
19537843d17aSBarry Smith        0,
195497304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ,
19557843d17aSBarry Smith        0,
195697304618SKris Buschelman        0,
195797304618SKris Buschelman        0,
195897304618SKris Buschelman        0,
195997304618SKris Buschelman /*75*/ 0,
196097304618SKris Buschelman        0,
196197304618SKris Buschelman        0,
196297304618SKris Buschelman        0,
196397304618SKris Buschelman        0,
196497304618SKris Buschelman /*80*/ 0,
196597304618SKris Buschelman        0,
196697304618SKris Buschelman        0,
196797304618SKris Buschelman        0,
196897304618SKris Buschelman /*85*/ MatLoad_MPIBAIJ
196997304618SKris Buschelman };
197079bdfe76SSatish Balay 
19715ef9f2a5SBarry Smith 
1972e18c124aSSatish Balay EXTERN_C_BEGIN
19734a2ae208SSatish Balay #undef __FUNCT__
19744a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
19755ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
19765ef9f2a5SBarry Smith {
19775ef9f2a5SBarry Smith   PetscFunctionBegin;
19785ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
19795ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
19805ef9f2a5SBarry Smith   PetscFunctionReturn(0);
19815ef9f2a5SBarry Smith }
1982e18c124aSSatish Balay EXTERN_C_END
198379bdfe76SSatish Balay 
1984273d9f13SBarry Smith EXTERN_C_BEGIN
1985d94109b8SHong Zhang extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
1986d94109b8SHong Zhang EXTERN_C_END
1987d94109b8SHong Zhang 
1988d94109b8SHong Zhang EXTERN_C_BEGIN
19894a2ae208SSatish Balay #undef __FUNCT__
1990a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
1991a23d5eceSKris Buschelman int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1992a23d5eceSKris Buschelman {
1993a23d5eceSKris Buschelman   Mat_MPIBAIJ  *b;
1994a23d5eceSKris Buschelman   int          ierr,i;
1995a23d5eceSKris Buschelman 
1996a23d5eceSKris Buschelman   PetscFunctionBegin;
1997a23d5eceSKris Buschelman   B->preallocated = PETSC_TRUE;
1998a23d5eceSKris Buschelman   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1999a23d5eceSKris Buschelman 
2000a23d5eceSKris Buschelman   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2001a23d5eceSKris Buschelman   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2002a23d5eceSKris Buschelman   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2003a23d5eceSKris Buschelman   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2004a23d5eceSKris Buschelman   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2005a23d5eceSKris Buschelman   if (d_nnz) {
2006a23d5eceSKris Buschelman   for (i=0; i<B->m/bs; i++) {
2007a23d5eceSKris Buschelman       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2008a23d5eceSKris Buschelman     }
2009a23d5eceSKris Buschelman   }
2010a23d5eceSKris Buschelman   if (o_nnz) {
2011a23d5eceSKris Buschelman     for (i=0; i<B->m/bs; i++) {
2012a23d5eceSKris Buschelman       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2013a23d5eceSKris Buschelman     }
2014a23d5eceSKris Buschelman   }
2015a23d5eceSKris Buschelman 
2016a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2017a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2018a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2019a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2020a23d5eceSKris Buschelman 
2021a23d5eceSKris Buschelman   b = (Mat_MPIBAIJ*)B->data;
2022a23d5eceSKris Buschelman   b->bs  = bs;
2023a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2024a23d5eceSKris Buschelman   b->mbs = B->m/bs;
2025a23d5eceSKris Buschelman   b->nbs = B->n/bs;
2026a23d5eceSKris Buschelman   b->Mbs = B->M/bs;
2027a23d5eceSKris Buschelman   b->Nbs = B->N/bs;
2028a23d5eceSKris Buschelman 
2029a23d5eceSKris Buschelman   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2030a23d5eceSKris Buschelman   b->rowners[0]    = 0;
2031a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2032a23d5eceSKris Buschelman     b->rowners[i] += b->rowners[i-1];
2033a23d5eceSKris Buschelman   }
2034a23d5eceSKris Buschelman   b->rstart    = b->rowners[b->rank];
2035a23d5eceSKris Buschelman   b->rend      = b->rowners[b->rank+1];
2036a23d5eceSKris Buschelman 
2037a23d5eceSKris Buschelman   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2038a23d5eceSKris Buschelman   b->cowners[0] = 0;
2039a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2040a23d5eceSKris Buschelman     b->cowners[i] += b->cowners[i-1];
2041a23d5eceSKris Buschelman   }
2042a23d5eceSKris Buschelman   b->cstart    = b->cowners[b->rank];
2043a23d5eceSKris Buschelman   b->cend      = b->cowners[b->rank+1];
2044a23d5eceSKris Buschelman 
2045a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2046a23d5eceSKris Buschelman     b->rowners_bs[i] = b->rowners[i]*bs;
2047a23d5eceSKris Buschelman   }
2048a23d5eceSKris Buschelman   b->rstart_bs = b->rstart*bs;
2049a23d5eceSKris Buschelman   b->rend_bs   = b->rend*bs;
2050a23d5eceSKris Buschelman   b->cstart_bs = b->cstart*bs;
2051a23d5eceSKris Buschelman   b->cend_bs   = b->cend*bs;
2052a23d5eceSKris Buschelman 
20539c097c71SKris Buschelman   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
20549c097c71SKris Buschelman   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2055c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
20569c097c71SKris Buschelman   PetscLogObjectParent(B,b->A);
20579c097c71SKris Buschelman   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
20589c097c71SKris Buschelman   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2059c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
20609c097c71SKris Buschelman   PetscLogObjectParent(B,b->B);
2061c60e587dSKris Buschelman 
2062a23d5eceSKris Buschelman   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2063a23d5eceSKris Buschelman 
2064a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2065a23d5eceSKris Buschelman }
2066a23d5eceSKris Buschelman EXTERN_C_END
2067a23d5eceSKris Buschelman 
2068a23d5eceSKris Buschelman EXTERN_C_BEGIN
206992b32695SKris Buschelman EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
20705bf65638SKris Buschelman EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
207192b32695SKris Buschelman EXTERN_C_END
20725bf65638SKris Buschelman 
20730bad9183SKris Buschelman /*MC
2074fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
20750bad9183SKris Buschelman 
20760bad9183SKris Buschelman    Options Database Keys:
20770bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
20780bad9183SKris Buschelman 
20790bad9183SKris Buschelman   Level: beginner
20800bad9183SKris Buschelman 
20810bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ
20820bad9183SKris Buschelman M*/
20830bad9183SKris Buschelman 
208492b32695SKris Buschelman EXTERN_C_BEGIN
2085a23d5eceSKris Buschelman #undef __FUNCT__
20864a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ"
2087273d9f13SBarry Smith int MatCreate_MPIBAIJ(Mat B)
2088273d9f13SBarry Smith {
2089273d9f13SBarry Smith   Mat_MPIBAIJ  *b;
2090cfce73b9SSatish Balay   int          ierr;
2091273d9f13SBarry Smith   PetscTruth   flg;
2092273d9f13SBarry Smith 
2093273d9f13SBarry Smith   PetscFunctionBegin;
209482502324SSatish Balay   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
209582502324SSatish Balay   B->data = (void*)b;
209682502324SSatish Balay 
2097273d9f13SBarry Smith   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2098273d9f13SBarry Smith   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2099273d9f13SBarry Smith   B->mapping    = 0;
2100273d9f13SBarry Smith   B->factor     = 0;
2101273d9f13SBarry Smith   B->assembled  = PETSC_FALSE;
2102273d9f13SBarry Smith 
2103273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
2104273d9f13SBarry Smith   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2105273d9f13SBarry Smith   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2106273d9f13SBarry Smith 
2107273d9f13SBarry Smith   /* build local table of row and column ownerships */
210882502324SSatish Balay   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2109b0a32e0cSBarry Smith   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2110273d9f13SBarry Smith   b->cowners    = b->rowners + b->size + 2;
2111273d9f13SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2112273d9f13SBarry Smith 
2113273d9f13SBarry Smith   /* build cache for off array entries formed */
2114273d9f13SBarry Smith   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2115273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
2116273d9f13SBarry Smith   b->colmap      = PETSC_NULL;
2117273d9f13SBarry Smith   b->garray      = PETSC_NULL;
2118273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2119273d9f13SBarry Smith 
2120cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
2121273d9f13SBarry Smith   /* stuff for MatSetValues_XXX in single precision */
212264a35ccbSBarry Smith   b->setvalueslen     = 0;
2123273d9f13SBarry Smith   b->setvaluescopy    = PETSC_NULL;
2124273d9f13SBarry Smith #endif
2125273d9f13SBarry Smith 
2126273d9f13SBarry Smith   /* stuff used in block assembly */
2127273d9f13SBarry Smith   b->barray       = 0;
2128273d9f13SBarry Smith 
2129273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2130273d9f13SBarry Smith   b->lvec         = 0;
2131273d9f13SBarry Smith   b->Mvctx        = 0;
2132273d9f13SBarry Smith 
2133273d9f13SBarry Smith   /* stuff for MatGetRow() */
2134273d9f13SBarry Smith   b->rowindices   = 0;
2135273d9f13SBarry Smith   b->rowvalues    = 0;
2136273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2137273d9f13SBarry Smith 
2138273d9f13SBarry Smith   /* hash table stuff */
2139273d9f13SBarry Smith   b->ht           = 0;
2140273d9f13SBarry Smith   b->hd           = 0;
2141273d9f13SBarry Smith   b->ht_size      = 0;
2142273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2143273d9f13SBarry Smith   b->ht_fact      = 0;
2144273d9f13SBarry Smith   b->ht_total_ct  = 0;
2145273d9f13SBarry Smith   b->ht_insert_ct = 0;
2146273d9f13SBarry Smith 
2147b0a32e0cSBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2148273d9f13SBarry Smith   if (flg) {
2149f6275e2eSBarry Smith     PetscReal fact = 1.39;
2150273d9f13SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
215187828ca2SBarry Smith     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2152273d9f13SBarry Smith     if (fact <= 1.0) fact = 1.39;
2153273d9f13SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2154b0a32e0cSBarry Smith     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2155273d9f13SBarry Smith   }
2156273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2157273d9f13SBarry Smith                                      "MatStoreValues_MPIBAIJ",
2158273d9f13SBarry Smith                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2159273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2160273d9f13SBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
2161273d9f13SBarry Smith                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2162273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2163273d9f13SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
2164273d9f13SBarry Smith                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2165a23d5eceSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2166a23d5eceSKris Buschelman                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2167a23d5eceSKris Buschelman                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
216892b32695SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
216992b32695SKris Buschelman                                      "MatDiagonalScaleLocal_MPIBAIJ",
217092b32695SKris Buschelman                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
21715bf65638SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
21725bf65638SKris Buschelman                                      "MatSetHashTableFactor_MPIBAIJ",
21735bf65638SKris Buschelman                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2174273d9f13SBarry Smith   PetscFunctionReturn(0);
2175273d9f13SBarry Smith }
2176273d9f13SBarry Smith EXTERN_C_END
2177273d9f13SBarry Smith 
2178209238afSKris Buschelman /*MC
2179002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2180209238afSKris Buschelman 
2181209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2182209238afSKris Buschelman    and MATMPIBAIJ otherwise.
2183209238afSKris Buschelman 
2184209238afSKris Buschelman    Options Database Keys:
2185209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2186209238afSKris Buschelman 
2187209238afSKris Buschelman   Level: beginner
2188209238afSKris Buschelman 
2189209238afSKris Buschelman .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2190209238afSKris Buschelman M*/
2191209238afSKris Buschelman 
2192209238afSKris Buschelman EXTERN_C_BEGIN
2193209238afSKris Buschelman #undef __FUNCT__
2194209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ"
2195209238afSKris Buschelman int MatCreate_BAIJ(Mat A) {
2196209238afSKris Buschelman   int ierr,size;
2197209238afSKris Buschelman 
2198209238afSKris Buschelman   PetscFunctionBegin;
2199209238afSKris Buschelman   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2200209238afSKris Buschelman   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2201209238afSKris Buschelman   if (size == 1) {
2202209238afSKris Buschelman     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2203209238afSKris Buschelman   } else {
2204209238afSKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2205209238afSKris Buschelman   }
2206209238afSKris Buschelman   PetscFunctionReturn(0);
2207209238afSKris Buschelman }
2208209238afSKris Buschelman EXTERN_C_END
2209209238afSKris Buschelman 
22104a2ae208SSatish Balay #undef __FUNCT__
22114a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2212273d9f13SBarry Smith /*@C
2213273d9f13SBarry Smith    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2214273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2215273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2216273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2217273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2218273d9f13SBarry Smith 
2219273d9f13SBarry Smith    Collective on Mat
2220273d9f13SBarry Smith 
2221273d9f13SBarry Smith    Input Parameters:
2222273d9f13SBarry Smith +  A - the matrix
2223273d9f13SBarry Smith .  bs   - size of blockk
2224273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2225273d9f13SBarry Smith            submatrix  (same for all local rows)
2226273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
2227273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
2228273d9f13SBarry Smith            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2229273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2230273d9f13SBarry Smith            submatrix (same for all local rows).
2231273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
2232273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
2233273d9f13SBarry Smith            each block row) or PETSC_NULL.
2234273d9f13SBarry Smith 
2235273d9f13SBarry Smith    Output Parameter:
2236273d9f13SBarry Smith 
2237273d9f13SBarry Smith 
2238273d9f13SBarry Smith    Options Database Keys:
2239273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2240273d9f13SBarry Smith                      block calculations (much slower)
2241273d9f13SBarry Smith .   -mat_block_size - size of the blocks to use
2242273d9f13SBarry Smith 
2243273d9f13SBarry Smith    Notes:
2244273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2245273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
2246273d9f13SBarry Smith 
2247273d9f13SBarry Smith    Storage Information:
2248273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
2249273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
2250273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
2251273d9f13SBarry Smith    local matrix (a rectangular submatrix).
2252273d9f13SBarry Smith 
2253273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
2254273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
2255273d9f13SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2256273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
2257273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2258273d9f13SBarry Smith 
2259273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2260273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
2261273d9f13SBarry Smith 
2262273d9f13SBarry Smith .vb
2263273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
2264273d9f13SBarry Smith           -------------------
2265273d9f13SBarry Smith    row 3  |  o o o d d d o o o o o o
2266273d9f13SBarry Smith    row 4  |  o o o d d d o o o o o o
2267273d9f13SBarry Smith    row 5  |  o o o d d d o o o o o o
2268273d9f13SBarry Smith           -------------------
2269273d9f13SBarry Smith .ve
2270273d9f13SBarry Smith 
2271273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
2272273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
2273273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2274273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
2275273d9f13SBarry Smith 
2276273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2277273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2278273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
2279273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2280273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
2281273d9f13SBarry Smith    matrices.
2282273d9f13SBarry Smith 
2283273d9f13SBarry Smith    Level: intermediate
2284273d9f13SBarry Smith 
2285273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel
2286273d9f13SBarry Smith 
2287273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2288273d9f13SBarry Smith @*/
2289ca01db9bSBarry Smith int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2290273d9f13SBarry Smith {
2291ca01db9bSBarry Smith   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2292273d9f13SBarry Smith 
2293273d9f13SBarry Smith   PetscFunctionBegin;
2294a23d5eceSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2295a23d5eceSKris Buschelman   if (f) {
2296a23d5eceSKris Buschelman     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2297273d9f13SBarry Smith   }
2298273d9f13SBarry Smith   PetscFunctionReturn(0);
2299273d9f13SBarry Smith }
2300273d9f13SBarry Smith 
23014a2ae208SSatish Balay #undef __FUNCT__
23024a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ"
230379bdfe76SSatish Balay /*@C
230479bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
230579bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
230679bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
230779bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
230879bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
230979bdfe76SSatish Balay 
2310db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
2311db81eaa0SLois Curfman McInnes 
231279bdfe76SSatish Balay    Input Parameters:
2313db81eaa0SLois Curfman McInnes +  comm - MPI communicator
231479bdfe76SSatish Balay .  bs   - size of blockk
231579bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
231692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
231792e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
231892e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
231992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
232092e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
2321be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2322be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
232347a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
232479bdfe76SSatish Balay            submatrix  (same for all local rows)
232547a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
232692e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2327db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
232847a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
232979bdfe76SSatish Balay            submatrix (same for all local rows).
233047a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
233192e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
233292e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
233379bdfe76SSatish Balay 
233479bdfe76SSatish Balay    Output Parameter:
233579bdfe76SSatish Balay .  A - the matrix
233679bdfe76SSatish Balay 
2337db81eaa0SLois Curfman McInnes    Options Database Keys:
2338db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
2339db81eaa0SLois Curfman McInnes                      block calculations (much slower)
2340db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
23413ffaccefSLois Curfman McInnes 
2342b259b22eSLois Curfman McInnes    Notes:
234347a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
234447a75d0bSBarry Smith 
234579bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
234679bdfe76SSatish Balay    (possibly both).
234779bdfe76SSatish Balay 
2348be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2349be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2350be79a94dSBarry Smith 
235179bdfe76SSatish Balay    Storage Information:
235279bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
235379bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
235479bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
235579bdfe76SSatish Balay    local matrix (a rectangular submatrix).
235679bdfe76SSatish Balay 
235779bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
235879bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
235979bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
236079bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
236179bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
236279bdfe76SSatish Balay 
236379bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
236479bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
236579bdfe76SSatish Balay 
2366db81eaa0SLois Curfman McInnes .vb
2367db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2368db81eaa0SLois Curfman McInnes           -------------------
2369db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2370db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2371db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2372db81eaa0SLois Curfman McInnes           -------------------
2373db81eaa0SLois Curfman McInnes .ve
237479bdfe76SSatish Balay 
237579bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
237679bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
237779bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
237857b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
237979bdfe76SSatish Balay 
2380d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2381d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
238279bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
238392e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
238492e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
23856da5968aSLois Curfman McInnes    matrices.
238679bdfe76SSatish Balay 
2387027ccd11SLois Curfman McInnes    Level: intermediate
2388027ccd11SLois Curfman McInnes 
238992e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
239079bdfe76SSatish Balay 
23913eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
239279bdfe76SSatish Balay @*/
2393ca01db9bSBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
239479bdfe76SSatish Balay {
2395273d9f13SBarry Smith   int ierr,size;
239679bdfe76SSatish Balay 
2397d64ed03dSBarry Smith   PetscFunctionBegin;
2398273d9f13SBarry Smith   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2399d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2400273d9f13SBarry Smith   if (size > 1) {
2401273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2402273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2403273d9f13SBarry Smith   } else {
2404273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2405273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
24063914022bSBarry Smith   }
24073a40ed3dSBarry Smith   PetscFunctionReturn(0);
240879bdfe76SSatish Balay }
2409026e39d0SSatish Balay 
24104a2ae208SSatish Balay #undef __FUNCT__
24114a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ"
24122e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
24130ac07820SSatish Balay {
24140ac07820SSatish Balay   Mat         mat;
24150ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2416f1af5d2fSBarry Smith   int         ierr,len=0;
24170ac07820SSatish Balay 
2418d64ed03dSBarry Smith   PetscFunctionBegin;
24190ac07820SSatish Balay   *newmat       = 0;
2420273d9f13SBarry Smith   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2421be5d1d56SKris Buschelman   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
24227fff6886SHong Zhang 
24234beb1cfeSHong Zhang   ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
24244beb1cfeSHong Zhang   mat->factor       = matin->factor;
2425273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
24260ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
24277fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
24287fff6886SHong Zhang 
2429273d9f13SBarry Smith   a      = (Mat_MPIBAIJ*)mat->data;
24300ac07820SSatish Balay   a->bs  = oldmat->bs;
24310ac07820SSatish Balay   a->bs2 = oldmat->bs2;
24320ac07820SSatish Balay   a->mbs = oldmat->mbs;
24330ac07820SSatish Balay   a->nbs = oldmat->nbs;
24340ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
24350ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
24360ac07820SSatish Balay 
24370ac07820SSatish Balay   a->rstart       = oldmat->rstart;
24380ac07820SSatish Balay   a->rend         = oldmat->rend;
24390ac07820SSatish Balay   a->cstart       = oldmat->cstart;
24400ac07820SSatish Balay   a->cend         = oldmat->cend;
24410ac07820SSatish Balay   a->size         = oldmat->size;
24420ac07820SSatish Balay   a->rank         = oldmat->rank;
2443aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2444aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2445aef5e8e0SSatish Balay   a->rowindices   = 0;
24460ac07820SSatish Balay   a->rowvalues    = 0;
24470ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
244830793edcSSatish Balay   a->barray       = 0;
24493123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
24503123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
24513123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
24523123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
24530ac07820SSatish Balay 
2454133cdb44SSatish Balay   /* hash table stuff */
2455133cdb44SSatish Balay   a->ht           = 0;
2456133cdb44SSatish Balay   a->hd           = 0;
2457133cdb44SSatish Balay   a->ht_size      = 0;
2458133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
245925fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2460133cdb44SSatish Balay   a->ht_total_ct  = 0;
2461133cdb44SSatish Balay   a->ht_insert_ct = 0;
2462133cdb44SSatish Balay 
2463549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
24648798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
24658798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
24660ac07820SSatish Balay   if (oldmat->colmap) {
2467aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
24680f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
246948e59246SSatish Balay #else
247082502324SSatish Balay   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2471b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2472549d3d68SSatish Balay   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
247348e59246SSatish Balay #endif
24740ac07820SSatish Balay   } else a->colmap = 0;
24754beb1cfeSHong Zhang 
24760ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
247782502324SSatish Balay     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2478b0a32e0cSBarry Smith     PetscLogObjectMemory(mat,len*sizeof(int));
2479549d3d68SSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
24800ac07820SSatish Balay   } else a->garray = 0;
24810ac07820SSatish Balay 
24820ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2483b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->lvec);
24840ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2485b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->Mvctx);
24867fff6886SHong Zhang 
24872e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2488b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->A);
24892e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2490b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->B);
2491b0a32e0cSBarry Smith   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
24920ac07820SSatish Balay   *newmat = mat;
24934beb1cfeSHong Zhang 
24943a40ed3dSBarry Smith   PetscFunctionReturn(0);
24950ac07820SSatish Balay }
249657b952d6SSatish Balay 
2497e090d566SSatish Balay #include "petscsys.h"
249857b952d6SSatish Balay 
24994a2ae208SSatish Balay #undef __FUNCT__
25004a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ"
25018e9aea5cSBarry Smith int MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
250257b952d6SSatish Balay {
250357b952d6SSatish Balay   Mat          A;
250457b952d6SSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
250587828ca2SBarry Smith   PetscScalar  *vals,*buf;
250657b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
250757b952d6SSatish Balay   MPI_Status   status;
2508cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
250957b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2510f1af5d2fSBarry Smith   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
251157b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
251257b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
251357b952d6SSatish Balay 
2514d64ed03dSBarry Smith   PetscFunctionBegin;
2515b0a32e0cSBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
251657b952d6SSatish Balay 
2517d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2518d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
251957b952d6SSatish Balay   if (!rank) {
2520b0a32e0cSBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2521e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2522552e946dSBarry Smith     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2523d64ed03dSBarry Smith     if (header[3] < 0) {
252429bbc08cSBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2525d64ed03dSBarry Smith     }
25266c5fab8fSBarry Smith   }
2527d64ed03dSBarry Smith 
2528ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
252957b952d6SSatish Balay   M = header[1]; N = header[2];
253057b952d6SSatish Balay 
253129bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
253257b952d6SSatish Balay 
253357b952d6SSatish Balay   /*
253457b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
253557b952d6SSatish Balay      divisible by the blocksize
253657b952d6SSatish Balay   */
253757b952d6SSatish Balay   Mbs        = M/bs;
253857b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
253957b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
254057b952d6SSatish Balay   else                  Mbs++;
254157b952d6SSatish Balay   if (extra_rows &&!rank) {
2542b0a32e0cSBarry Smith     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
254357b952d6SSatish Balay   }
2544537820f0SBarry Smith 
254557b952d6SSatish Balay   /* determine ownership of all rows */
254657b952d6SSatish Balay   mbs        = Mbs/size + ((Mbs % size) > rank);
254757b952d6SSatish Balay   m          = mbs*bs;
2548b0a32e0cSBarry Smith   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2549cee3aa6bSSatish Balay   browners   = rowners + size + 1;
2550ca161407SBarry Smith   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
255157b952d6SSatish Balay   rowners[0] = 0;
2552cee3aa6bSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2553cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
255457b952d6SSatish Balay   rstart = rowners[rank];
255557b952d6SSatish Balay   rend   = rowners[rank+1];
255657b952d6SSatish Balay 
255757b952d6SSatish Balay   /* distribute row lengths to all processors */
255882502324SSatish Balay   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
255957b952d6SSatish Balay   if (!rank) {
2560b0a32e0cSBarry Smith     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2561e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
256257b952d6SSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
256382502324SSatish Balay     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2564cee3aa6bSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2565ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2566606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2567d64ed03dSBarry Smith   } else {
2568ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
256957b952d6SSatish Balay   }
257057b952d6SSatish Balay 
257157b952d6SSatish Balay   if (!rank) {
257257b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
257382502324SSatish Balay     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2574549d3d68SSatish Balay     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
257557b952d6SSatish Balay     for (i=0; i<size; i++) {
257657b952d6SSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
257757b952d6SSatish Balay         procsnz[i] += rowlengths[j];
257857b952d6SSatish Balay       }
257957b952d6SSatish Balay     }
2580606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
258157b952d6SSatish Balay 
258257b952d6SSatish Balay     /* determine max buffer needed and allocate it */
258357b952d6SSatish Balay     maxnz = 0;
258457b952d6SSatish Balay     for (i=0; i<size; i++) {
258557b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
258657b952d6SSatish Balay     }
258782502324SSatish Balay     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
258857b952d6SSatish Balay 
258957b952d6SSatish Balay     /* read in my part of the matrix column indices  */
259057b952d6SSatish Balay     nz     = procsnz[0];
259182502324SSatish Balay     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
259257b952d6SSatish Balay     mycols = ibuf;
2593cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2594e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2595cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2596cee3aa6bSSatish Balay 
259757b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
259857b952d6SSatish Balay     for (i=1; i<size-1; i++) {
259957b952d6SSatish Balay       nz   = procsnz[i];
2600e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2601ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
260257b952d6SSatish Balay     }
260357b952d6SSatish Balay     /* read in the stuff for the last proc */
260457b952d6SSatish Balay     if (size != 1) {
260557b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2606e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
260757b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2608ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
260957b952d6SSatish Balay     }
2610606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2611d64ed03dSBarry Smith   } else {
261257b952d6SSatish Balay     /* determine buffer space needed for message */
261357b952d6SSatish Balay     nz = 0;
261457b952d6SSatish Balay     for (i=0; i<m; i++) {
261557b952d6SSatish Balay       nz += locrowlens[i];
261657b952d6SSatish Balay     }
261782502324SSatish Balay     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
261857b952d6SSatish Balay     mycols = ibuf;
261957b952d6SSatish Balay     /* receive message of column indices*/
2620ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2621ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
262229bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
262357b952d6SSatish Balay   }
262457b952d6SSatish Balay 
262557b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
262682502324SSatish Balay   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2627cee3aa6bSSatish Balay   odlens   = dlens + (rend-rstart);
262882502324SSatish Balay   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2629549d3d68SSatish Balay   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
263057b952d6SSatish Balay   masked1  = mask    + Mbs;
263157b952d6SSatish Balay   masked2  = masked1 + Mbs;
263257b952d6SSatish Balay   rowcount = 0; nzcount = 0;
263357b952d6SSatish Balay   for (i=0; i<mbs; i++) {
263457b952d6SSatish Balay     dcount  = 0;
263557b952d6SSatish Balay     odcount = 0;
263657b952d6SSatish Balay     for (j=0; j<bs; j++) {
263757b952d6SSatish Balay       kmax = locrowlens[rowcount];
263857b952d6SSatish Balay       for (k=0; k<kmax; k++) {
263957b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
264057b952d6SSatish Balay         if (!mask[tmp]) {
264157b952d6SSatish Balay           mask[tmp] = 1;
264257b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
264357b952d6SSatish Balay           else masked1[dcount++] = tmp;
264457b952d6SSatish Balay         }
264557b952d6SSatish Balay       }
264657b952d6SSatish Balay       rowcount++;
264757b952d6SSatish Balay     }
2648cee3aa6bSSatish Balay 
264957b952d6SSatish Balay     dlens[i]  = dcount;
265057b952d6SSatish Balay     odlens[i] = odcount;
2651cee3aa6bSSatish Balay 
265257b952d6SSatish Balay     /* zero out the mask elements we set */
265357b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
265457b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
265557b952d6SSatish Balay   }
2656cee3aa6bSSatish Balay 
265757b952d6SSatish Balay   /* create our matrix */
265878ae41b4SKris Buschelman   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
265978ae41b4SKris Buschelman   ierr = MatSetType(A,type);CHKERRQ(ierr)
266078ae41b4SKris Buschelman   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
266178ae41b4SKris Buschelman 
266278ae41b4SKris Buschelman   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
26636d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
266457b952d6SSatish Balay 
266557b952d6SSatish Balay   if (!rank) {
266687828ca2SBarry Smith     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
266757b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
266857b952d6SSatish Balay     nz = procsnz[0];
266957b952d6SSatish Balay     vals = buf;
2670cee3aa6bSSatish Balay     mycols = ibuf;
2671cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2672e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2673cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2674537820f0SBarry Smith 
267557b952d6SSatish Balay     /* insert into matrix */
267657b952d6SSatish Balay     jj      = rstart*bs;
267757b952d6SSatish Balay     for (i=0; i<m; i++) {
2678b48991e4SBarry Smith       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
267957b952d6SSatish Balay       mycols += locrowlens[i];
268057b952d6SSatish Balay       vals   += locrowlens[i];
268157b952d6SSatish Balay       jj++;
268257b952d6SSatish Balay     }
268357b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
268457b952d6SSatish Balay     for (i=1; i<size-1; i++) {
268557b952d6SSatish Balay       nz   = procsnz[i];
268657b952d6SSatish Balay       vals = buf;
2687e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2688ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
268957b952d6SSatish Balay     }
269057b952d6SSatish Balay     /* the last proc */
269157b952d6SSatish Balay     if (size != 1){
269257b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2693cee3aa6bSSatish Balay       vals = buf;
2694e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
269557b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2696ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
269757b952d6SSatish Balay     }
2698606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2699d64ed03dSBarry Smith   } else {
270057b952d6SSatish Balay     /* receive numeric values */
270187828ca2SBarry Smith     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
270257b952d6SSatish Balay 
270357b952d6SSatish Balay     /* receive message of values*/
270457b952d6SSatish Balay     vals   = buf;
2705cee3aa6bSSatish Balay     mycols = ibuf;
2706ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2707ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
270829bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
270957b952d6SSatish Balay 
271057b952d6SSatish Balay     /* insert into matrix */
271157b952d6SSatish Balay     jj      = rstart*bs;
2712cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
2713b48991e4SBarry Smith       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
271457b952d6SSatish Balay       mycols += locrowlens[i];
271557b952d6SSatish Balay       vals   += locrowlens[i];
271657b952d6SSatish Balay       jj++;
271757b952d6SSatish Balay     }
271857b952d6SSatish Balay   }
2719606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2720606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2721606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2722606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2723606d414cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2724606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
27256d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
27266d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272778ae41b4SKris Buschelman 
272878ae41b4SKris Buschelman   *newmat = A;
27293a40ed3dSBarry Smith   PetscFunctionReturn(0);
273057b952d6SSatish Balay }
273157b952d6SSatish Balay 
27324a2ae208SSatish Balay #undef __FUNCT__
27334a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2734133cdb44SSatish Balay /*@
2735133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2736133cdb44SSatish Balay 
2737133cdb44SSatish Balay    Input Parameters:
2738133cdb44SSatish Balay .  mat  - the matrix
2739133cdb44SSatish Balay .  fact - factor
2740133cdb44SSatish Balay 
2741fee21e36SBarry Smith    Collective on Mat
2742fee21e36SBarry Smith 
27438c890885SBarry Smith    Level: advanced
27448c890885SBarry Smith 
2745133cdb44SSatish Balay   Notes:
2746133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2747133cdb44SSatish Balay 
2748133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2749133cdb44SSatish Balay 
2750133cdb44SSatish Balay .seealso: MatSetOption()
2751133cdb44SSatish Balay @*/
2752329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2753133cdb44SSatish Balay {
27545bf65638SKris Buschelman   int ierr,(*f)(Mat,PetscReal);
27555bf65638SKris Buschelman 
27565bf65638SKris Buschelman   PetscFunctionBegin;
27575bf65638SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
27585bf65638SKris Buschelman   if (f) {
27595bf65638SKris Buschelman     ierr = (*f)(mat,fact);CHKERRQ(ierr);
27605bf65638SKris Buschelman   }
27615bf65638SKris Buschelman   PetscFunctionReturn(0);
27625bf65638SKris Buschelman }
27635bf65638SKris Buschelman 
27645bf65638SKris Buschelman #undef __FUNCT__
27655bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
27665bf65638SKris Buschelman int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
27675bf65638SKris Buschelman {
276825fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2769133cdb44SSatish Balay 
2770133cdb44SSatish Balay   PetscFunctionBegin;
2771133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2772133cdb44SSatish Balay   baij->ht_fact = fact;
2773133cdb44SSatish Balay   PetscFunctionReturn(0);
2774133cdb44SSatish Balay }
2775f2a5309cSSatish Balay 
27764a2ae208SSatish Balay #undef __FUNCT__
27774a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2778268466fbSBarry Smith int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2779f2a5309cSSatish Balay {
2780f2a5309cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2781f2a5309cSSatish Balay   PetscFunctionBegin;
2782f2a5309cSSatish Balay   *Ad     = a->A;
2783f2a5309cSSatish Balay   *Ao     = a->B;
2784195d93cdSBarry Smith   *colmap = a->garray;
2785f2a5309cSSatish Balay   PetscFunctionReturn(0);
2786f2a5309cSSatish Balay }
2787