xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 1d5dac46e5bf6fbea62f3101f11553ee697c545e)
179bdfe76SSatish Balay 
2e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
379bdfe76SSatish Balay 
4ca44d042SBarry Smith EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
5ca44d042SBarry Smith EXTERN int DisAssemble_MPIBAIJ(Mat);
6268466fbSBarry Smith EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
7268466fbSBarry Smith EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
8f15d580aSBarry Smith EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
9f15d580aSBarry Smith EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
10f15d580aSBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
11268466fbSBarry Smith EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
12268466fbSBarry Smith EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
13ca44d042SBarry Smith EXTERN int MatPrintHelp_SeqBAIJ(Mat);
14268466fbSBarry Smith EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
1593fea6afSBarry Smith 
1693fea6afSBarry Smith /*  UGLY, ugly, ugly
1787828ca2SBarry Smith    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
1893fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
1993fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2093fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2193fea6afSBarry Smith    into the single precision data structures.
2293fea6afSBarry Smith */
2393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2434232ad1SSatish Balay EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2534232ad1SSatish Balay EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2634232ad1SSatish Balay EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2734232ad1SSatish Balay EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2834232ad1SSatish Balay EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
2993fea6afSBarry Smith #else
306fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
3193fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
3293fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
336fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
346fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
3593fea6afSBarry Smith #endif
363b2fbd54SBarry Smith 
374a2ae208SSatish Balay #undef __FUNCT__
384a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
397843d17aSBarry Smith int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
407843d17aSBarry Smith {
417843d17aSBarry Smith   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
427843d17aSBarry Smith   int          ierr,i;
4387828ca2SBarry Smith   PetscScalar  *va,*vb;
447843d17aSBarry Smith   Vec          vtmp;
457843d17aSBarry Smith 
467843d17aSBarry Smith   PetscFunctionBegin;
477843d17aSBarry Smith 
487843d17aSBarry Smith   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
491ebc52fbSHong Zhang   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
507843d17aSBarry Smith 
51ac355199SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
527843d17aSBarry Smith   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
531ebc52fbSHong Zhang   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
547843d17aSBarry Smith 
55273d9f13SBarry Smith   for (i=0; i<A->m; i++){
56273d9f13SBarry Smith     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
577843d17aSBarry Smith   }
587843d17aSBarry Smith 
591ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
601ebc52fbSHong Zhang   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
61ac355199SBarry Smith   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
627843d17aSBarry Smith 
637843d17aSBarry Smith   PetscFunctionReturn(0);
647843d17aSBarry Smith }
657843d17aSBarry Smith 
667fc3c18eSBarry Smith EXTERN_C_BEGIN
674a2ae208SSatish Balay #undef __FUNCT__
684a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ"
697fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat)
707fc3c18eSBarry Smith {
717fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
727fc3c18eSBarry Smith   int         ierr;
737fc3c18eSBarry Smith 
747fc3c18eSBarry Smith   PetscFunctionBegin;
757fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
767fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
777fc3c18eSBarry Smith   PetscFunctionReturn(0);
787fc3c18eSBarry Smith }
797fc3c18eSBarry Smith EXTERN_C_END
807fc3c18eSBarry Smith 
817fc3c18eSBarry Smith EXTERN_C_BEGIN
824a2ae208SSatish Balay #undef __FUNCT__
834a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
847fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat)
857fc3c18eSBarry Smith {
867fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
877fc3c18eSBarry Smith   int         ierr;
887fc3c18eSBarry Smith 
897fc3c18eSBarry Smith   PetscFunctionBegin;
907fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
917fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
927fc3c18eSBarry Smith   PetscFunctionReturn(0);
937fc3c18eSBarry Smith }
947fc3c18eSBarry Smith EXTERN_C_END
957fc3c18eSBarry Smith 
96537820f0SBarry Smith /*
97537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
9857b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
9957b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
10057b952d6SSatish Balay    length of colmap equals the global matrix length.
10157b952d6SSatish Balay */
1024a2ae208SSatish Balay #undef __FUNCT__
1034a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
104653e4784SBarry Smith int CreateColmap_MPIBAIJ_Private(Mat mat)
10557b952d6SSatish Balay {
10657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
10757b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
108dc2900e9SSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
10957b952d6SSatish Balay 
110d64ed03dSBarry Smith   PetscFunctionBegin;
111aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
112f14a1c24SBarry Smith   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
11348e59246SSatish Balay   for (i=0; i<nbs; i++){
1140f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
11548e59246SSatish Balay   }
11648e59246SSatish Balay #else
11782502324SSatish Balay   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
118b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
119549d3d68SSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
120928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
12148e59246SSatish Balay #endif
1223a40ed3dSBarry Smith   PetscFunctionReturn(0);
12357b952d6SSatish Balay }
12457b952d6SSatish Balay 
12580c1aa95SSatish Balay #define CHUNKSIZE  10
12680c1aa95SSatish Balay 
127f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
12880c1aa95SSatish Balay { \
12980c1aa95SSatish Balay  \
13080c1aa95SSatish Balay     brow = row/bs;  \
13180c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
132ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
13380c1aa95SSatish Balay       bcol = col/bs; \
13480c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
135ab26458aSBarry Smith       low = 0; high = nrow; \
136ab26458aSBarry Smith       while (high-low > 3) { \
137ab26458aSBarry Smith         t = (low+high)/2; \
138ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
139ab26458aSBarry Smith         else              low  = t; \
140ab26458aSBarry Smith       } \
141ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
14280c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
14380c1aa95SSatish Balay         if (rp[_i] == bcol) { \
14480c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
145eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
146eada6651SSatish Balay           else                    *bap  = value;  \
147ac7a638eSSatish Balay           goto a_noinsert; \
14880c1aa95SSatish Balay         } \
14980c1aa95SSatish Balay       } \
15089280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
151a45adfd6SMatthew Knepley       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
15280c1aa95SSatish Balay       if (nrow >= rmax) { \
15380c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
15480c1aa95SSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
1553eda8832SBarry Smith         MatScalar *new_a; \
15680c1aa95SSatish Balay  \
157a45adfd6SMatthew Knepley         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
15889280ab3SLois Curfman McInnes  \
15980c1aa95SSatish Balay         /* malloc new storage space */ \
1603eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
16182502324SSatish Balay         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
16280c1aa95SSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
16380c1aa95SSatish Balay         new_i   = new_j + new_nz; \
16480c1aa95SSatish Balay  \
16580c1aa95SSatish Balay         /* copy over old data into new slots */ \
16680c1aa95SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
16780c1aa95SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
168549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
16980c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
1703eda8832SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
1713eda8832SBarry Smith         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
17287828ca2SBarry Smith         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
173549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
1743eda8832SBarry Smith                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
17580c1aa95SSatish Balay         /* free up old matrix storage */ \
176606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
177606d414cSSatish Balay         if (!a->singlemalloc) { \
178606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
179606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
180606d414cSSatish Balay         } \
18180c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1827c922b88SBarry Smith         a->singlemalloc = PETSC_TRUE; \
18380c1aa95SSatish Balay  \
18480c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
185ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
186b0a32e0cSBarry Smith         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
18780c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
18880c1aa95SSatish Balay         a->reallocs++; \
18980c1aa95SSatish Balay         a->nz++; \
19080c1aa95SSatish Balay       } \
19180c1aa95SSatish Balay       N = nrow++ - 1;  \
19280c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
19380c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
19480c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1953eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
19680c1aa95SSatish Balay       } \
1973eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
19880c1aa95SSatish Balay       rp[_i]                      = bcol;  \
19980c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
200ac7a638eSSatish Balay       a_noinsert:; \
20180c1aa95SSatish Balay     ailen[brow] = nrow; \
20280c1aa95SSatish Balay }
20357b952d6SSatish Balay 
204ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
205ac7a638eSSatish Balay { \
206ac7a638eSSatish Balay     brow = row/bs;  \
207ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
208ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
209ac7a638eSSatish Balay       bcol = col/bs; \
210ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
211ac7a638eSSatish Balay       low = 0; high = nrow; \
212ac7a638eSSatish Balay       while (high-low > 3) { \
213ac7a638eSSatish Balay         t = (low+high)/2; \
214ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
215ac7a638eSSatish Balay         else              low  = t; \
216ac7a638eSSatish Balay       } \
217ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
218ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
219ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
220ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
221ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
222ac7a638eSSatish Balay           else                    *bap  = value;  \
223ac7a638eSSatish Balay           goto b_noinsert; \
224ac7a638eSSatish Balay         } \
225ac7a638eSSatish Balay       } \
22689280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
227a45adfd6SMatthew Knepley       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
228ac7a638eSSatish Balay       if (nrow >= rmax) { \
229ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
23074c639caSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
2313eda8832SBarry Smith         MatScalar *new_a; \
232ac7a638eSSatish Balay  \
233a45adfd6SMatthew Knepley         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
23489280ab3SLois Curfman McInnes  \
235ac7a638eSSatish Balay         /* malloc new storage space */ \
2363eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
23782502324SSatish Balay         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
238ac7a638eSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
239ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
240ac7a638eSSatish Balay  \
241ac7a638eSSatish Balay         /* copy over old data into new slots */ \
242ac7a638eSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
24374c639caSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
244549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
245ac7a638eSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
2463eda8832SBarry Smith         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
2473eda8832SBarry Smith         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
2483eda8832SBarry Smith         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
249549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
2503eda8832SBarry Smith                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
251ac7a638eSSatish Balay         /* free up old matrix storage */ \
252606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
253606d414cSSatish Balay         if (!b->singlemalloc) { \
254606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
255606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
256606d414cSSatish Balay         } \
25774c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
2587c922b88SBarry Smith         b->singlemalloc = PETSC_TRUE; \
259ac7a638eSSatish Balay  \
260ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
261ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
262b0a32e0cSBarry Smith         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
26374c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
26474c639caSSatish Balay         b->reallocs++; \
26574c639caSSatish Balay         b->nz++; \
266ac7a638eSSatish Balay       } \
267ac7a638eSSatish Balay       N = nrow++ - 1;  \
268ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
269ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
270ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2713eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
272ac7a638eSSatish Balay       } \
2733eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
274ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
275ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
276ac7a638eSSatish Balay       b_noinsert:; \
277ac7a638eSSatish Balay     bilen[brow] = nrow; \
278ac7a638eSSatish Balay }
279ac7a638eSSatish Balay 
28093fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2814a2ae208SSatish Balay #undef __FUNCT__
2824a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ"
283f15d580aSBarry Smith int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
28457b952d6SSatish Balay {
2856fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
28693fea6afSBarry Smith   int         ierr,i,N = m*n;
2876fa18ffdSBarry Smith   MatScalar   *vsingle;
28893fea6afSBarry Smith 
28993fea6afSBarry Smith   PetscFunctionBegin;
2906fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2916fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
29282502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2936fa18ffdSBarry Smith     b->setvalueslen  = N;
2946fa18ffdSBarry Smith   }
2956fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2966fa18ffdSBarry Smith 
29793fea6afSBarry Smith   for (i=0; i<N; i++) {
29893fea6afSBarry Smith     vsingle[i] = v[i];
29993fea6afSBarry Smith   }
30093fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
30193fea6afSBarry Smith   PetscFunctionReturn(0);
30293fea6afSBarry Smith }
30393fea6afSBarry Smith 
3044a2ae208SSatish Balay #undef __FUNCT__
3054a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
306f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
30793fea6afSBarry Smith {
3086fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3096fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
3106fa18ffdSBarry Smith   MatScalar   *vsingle;
31193fea6afSBarry Smith 
31293fea6afSBarry Smith   PetscFunctionBegin;
3136fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3146fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
31582502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3166fa18ffdSBarry Smith     b->setvalueslen  = N;
3176fa18ffdSBarry Smith   }
3186fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
31993fea6afSBarry Smith   for (i=0; i<N; i++) {
32093fea6afSBarry Smith     vsingle[i] = v[i];
32193fea6afSBarry Smith   }
32293fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
32393fea6afSBarry Smith   PetscFunctionReturn(0);
32493fea6afSBarry Smith }
3256fa18ffdSBarry Smith 
3264a2ae208SSatish Balay #undef __FUNCT__
3274a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
328f15d580aSBarry Smith int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
3296fa18ffdSBarry Smith {
3306fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3316fa18ffdSBarry Smith   int         ierr,i,N = m*n;
3326fa18ffdSBarry Smith   MatScalar   *vsingle;
3336fa18ffdSBarry Smith 
3346fa18ffdSBarry Smith   PetscFunctionBegin;
3356fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3366fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
33782502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3386fa18ffdSBarry Smith     b->setvalueslen  = N;
3396fa18ffdSBarry Smith   }
3406fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3416fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3426fa18ffdSBarry Smith     vsingle[i] = v[i];
3436fa18ffdSBarry Smith   }
3446fa18ffdSBarry Smith   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3456fa18ffdSBarry Smith   PetscFunctionReturn(0);
3466fa18ffdSBarry Smith }
3476fa18ffdSBarry Smith 
3484a2ae208SSatish Balay #undef __FUNCT__
3494a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
350f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
3516fa18ffdSBarry Smith {
3526fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3536fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
3546fa18ffdSBarry Smith   MatScalar   *vsingle;
3556fa18ffdSBarry Smith 
3566fa18ffdSBarry Smith   PetscFunctionBegin;
3576fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3586fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
35982502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3606fa18ffdSBarry Smith     b->setvalueslen  = N;
3616fa18ffdSBarry Smith   }
3626fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3636fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3646fa18ffdSBarry Smith     vsingle[i] = v[i];
3656fa18ffdSBarry Smith   }
3666fa18ffdSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3676fa18ffdSBarry Smith   PetscFunctionReturn(0);
3686fa18ffdSBarry Smith }
36993fea6afSBarry Smith #endif
37093fea6afSBarry Smith 
3714a2ae208SSatish Balay #undef __FUNCT__
372e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
373f15d580aSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
37493fea6afSBarry Smith {
37557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
37693fea6afSBarry Smith   MatScalar   value;
377273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
3784fa0d573SSatish Balay   int         ierr,i,j,row,col;
379273d9f13SBarry Smith   int         rstart_orig=baij->rstart_bs;
3804fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
3814fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
38257b952d6SSatish Balay 
383eada6651SSatish Balay   /* Some Variables required in the macro */
38480c1aa95SSatish Balay   Mat         A = baij->A;
38580c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
386ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
3873eda8832SBarry Smith   MatScalar   *aa=a->a;
388ac7a638eSSatish Balay 
389ac7a638eSSatish Balay   Mat         B = baij->B;
390ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
391ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3923eda8832SBarry Smith   MatScalar   *ba=b->a;
393ac7a638eSSatish Balay 
394ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
395ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
3963eda8832SBarry Smith   MatScalar   *ap,*bap;
39780c1aa95SSatish Balay 
398d64ed03dSBarry Smith   PetscFunctionBegin;
39957b952d6SSatish Balay   for (i=0; i<m; i++) {
4005ef9f2a5SBarry Smith     if (im[i] < 0) continue;
401aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
402590ac198SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
403639f9d9dSBarry Smith #endif
40457b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
40557b952d6SSatish Balay       row = im[i] - rstart_orig;
40657b952d6SSatish Balay       for (j=0; j<n; j++) {
40757b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
40857b952d6SSatish Balay           col = in[j] - cstart_orig;
40957b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
41180c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
41273959e64SBarry Smith         } else if (in[j] < 0) continue;
413aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
414590ac198SBarry Smith         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
415639f9d9dSBarry Smith #endif
41657b952d6SSatish Balay         else {
41757b952d6SSatish Balay           if (mat->was_assembled) {
418905e6a2fSBarry Smith             if (!baij->colmap) {
419905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
420905e6a2fSBarry Smith             }
421aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4220f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
423bba1ac68SSatish Balay             col  = col - 1;
42448e59246SSatish Balay #else
425bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
42648e59246SSatish Balay #endif
42757b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
42857b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4298295de27SSatish Balay               col =  in[j];
4309bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
4319bf004c3SSatish Balay               B = baij->B;
4329bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
4339bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
4349bf004c3SSatish Balay               ba=b->a;
435bba1ac68SSatish Balay             } else col += in[j]%bs;
4368295de27SSatish Balay           } else col = in[j];
43757b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
43890da58bdSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
43990da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
44057b952d6SSatish Balay         }
44157b952d6SSatish Balay       }
442d64ed03dSBarry Smith     } else {
44390f02eecSBarry Smith       if (!baij->donotstash) {
444ff2fd236SBarry Smith         if (roworiented) {
4456fa18ffdSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
446ff2fd236SBarry Smith         } else {
4476fa18ffdSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
44857b952d6SSatish Balay         }
44957b952d6SSatish Balay       }
45057b952d6SSatish Balay     }
45190f02eecSBarry Smith   }
4523a40ed3dSBarry Smith   PetscFunctionReturn(0);
45357b952d6SSatish Balay }
45457b952d6SSatish Balay 
4554a2ae208SSatish Balay #undef __FUNCT__
4564a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
457f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
458ab26458aSBarry Smith {
459ab26458aSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
460f15d580aSBarry Smith   const MatScalar *value;
461f15d580aSBarry Smith   MatScalar       *barray=baij->barray;
462273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
463273d9f13SBarry Smith   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
464ab26458aSBarry Smith   int             rend=baij->rend,cstart=baij->cstart,stepval;
465ab26458aSBarry Smith   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
466ab26458aSBarry Smith 
467b16ae2b1SBarry Smith   PetscFunctionBegin;
46830793edcSSatish Balay   if(!barray) {
46982502324SSatish Balay     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
47082502324SSatish Balay     baij->barray = barray;
47130793edcSSatish Balay   }
47230793edcSSatish Balay 
473ab26458aSBarry Smith   if (roworiented) {
474ab26458aSBarry Smith     stepval = (n-1)*bs;
475ab26458aSBarry Smith   } else {
476ab26458aSBarry Smith     stepval = (m-1)*bs;
477ab26458aSBarry Smith   }
478ab26458aSBarry Smith   for (i=0; i<m; i++) {
4795ef9f2a5SBarry Smith     if (im[i] < 0) continue;
480aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
481590ac198SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
482ab26458aSBarry Smith #endif
483ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
484ab26458aSBarry Smith       row = im[i] - rstart;
485ab26458aSBarry Smith       for (j=0; j<n; j++) {
48615b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
48715b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
488f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
48915b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
490f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
49115b57d14SSatish Balay         } else { /* Here a copy is required */
492ab26458aSBarry Smith           if (roworiented) {
493ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
494ab26458aSBarry Smith           } else {
495ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
496abef11f7SSatish Balay           }
49747513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
49847513183SBarry Smith             for (jj=0; jj<bs; jj++) {
49930793edcSSatish Balay               *barray++  = *value++;
50047513183SBarry Smith             }
50147513183SBarry Smith           }
50230793edcSSatish Balay           barray -=bs2;
50315b57d14SSatish Balay         }
504abef11f7SSatish Balay 
505abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
506abef11f7SSatish Balay           col  = in[j] - cstart;
50793fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
508ab26458aSBarry Smith         }
5095ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
510aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
511590ac198SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
512ab26458aSBarry Smith #endif
513ab26458aSBarry Smith         else {
514ab26458aSBarry Smith           if (mat->was_assembled) {
515ab26458aSBarry Smith             if (!baij->colmap) {
516ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
517ab26458aSBarry Smith             }
518a5eb4965SSatish Balay 
519aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
520aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
521fa46199cSSatish Balay             { int data;
5220f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
52329bbc08cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
524fa46199cSSatish Balay             }
52548e59246SSatish Balay #else
52629bbc08cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
527a5eb4965SSatish Balay #endif
52848e59246SSatish Balay #endif
529aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
5300f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
531fa46199cSSatish Balay             col  = (col - 1)/bs;
53248e59246SSatish Balay #else
533a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
53448e59246SSatish Balay #endif
535ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
536ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
537ab26458aSBarry Smith               col =  in[j];
538ab26458aSBarry Smith             }
539ab26458aSBarry Smith           }
540ab26458aSBarry Smith           else col = in[j];
54193fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
542ab26458aSBarry Smith         }
543ab26458aSBarry Smith       }
544d64ed03dSBarry Smith     } else {
545ab26458aSBarry Smith       if (!baij->donotstash) {
546ff2fd236SBarry Smith         if (roworiented) {
5476fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
548ff2fd236SBarry Smith         } else {
5496fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
550ff2fd236SBarry Smith         }
551abef11f7SSatish Balay       }
552ab26458aSBarry Smith     }
553ab26458aSBarry Smith   }
5543a40ed3dSBarry Smith   PetscFunctionReturn(0);
555ab26458aSBarry Smith }
5566fa18ffdSBarry Smith 
5570bdbc534SSatish Balay #define HASH_KEY 0.6180339887
558c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
5596fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
560c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
5614a2ae208SSatish Balay #undef __FUNCT__
5624a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
563f15d580aSBarry Smith int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
5640bdbc534SSatish Balay {
5650bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
566273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
5670bdbc534SSatish Balay   int         ierr,i,j,row,col;
568273d9f13SBarry Smith   int         rstart_orig=baij->rstart_bs;
569c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
570c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
571329f5518SBarry Smith   PetscReal   tmp;
5723eda8832SBarry Smith   MatScalar   **HD = baij->hd,value;
573aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5744a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5754a15367fSSatish Balay #endif
5760bdbc534SSatish Balay 
5770bdbc534SSatish Balay   PetscFunctionBegin;
5780bdbc534SSatish Balay 
5790bdbc534SSatish Balay   for (i=0; i<m; i++) {
580aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
58129bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
582590ac198SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
5830bdbc534SSatish Balay #endif
5840bdbc534SSatish Balay       row = im[i];
585c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
5860bdbc534SSatish Balay       for (j=0; j<n; j++) {
5870bdbc534SSatish Balay         col = in[j];
5886fa18ffdSBarry Smith         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5890bdbc534SSatish Balay         /* Look up into the Hash Table */
590c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
591c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5920bdbc534SSatish Balay 
593c2760754SSatish Balay 
594c2760754SSatish Balay         idx = h1;
595aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
596187ce0cbSSatish Balay         insert_ct++;
597187ce0cbSSatish Balay         total_ct++;
598187ce0cbSSatish Balay         if (HT[idx] != key) {
599187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
600187ce0cbSSatish Balay           if (idx == size) {
601187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
602187ce0cbSSatish Balay             if (idx == h1) {
603a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
604187ce0cbSSatish Balay             }
605187ce0cbSSatish Balay           }
606187ce0cbSSatish Balay         }
607187ce0cbSSatish Balay #else
608c2760754SSatish Balay         if (HT[idx] != key) {
609c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
610c2760754SSatish Balay           if (idx == size) {
611c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
612c2760754SSatish Balay             if (idx == h1) {
613a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
614c2760754SSatish Balay             }
615c2760754SSatish Balay           }
616c2760754SSatish Balay         }
617187ce0cbSSatish Balay #endif
618c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
619c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
620c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
6210bdbc534SSatish Balay       }
6220bdbc534SSatish Balay     } else {
6230bdbc534SSatish Balay       if (!baij->donotstash) {
624ff2fd236SBarry Smith         if (roworiented) {
6258798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
626ff2fd236SBarry Smith         } else {
6278798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
6280bdbc534SSatish Balay         }
6290bdbc534SSatish Balay       }
6300bdbc534SSatish Balay     }
6310bdbc534SSatish Balay   }
632aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
633187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
634187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
635187ce0cbSSatish Balay #endif
6360bdbc534SSatish Balay   PetscFunctionReturn(0);
6370bdbc534SSatish Balay }
6380bdbc534SSatish Balay 
6394a2ae208SSatish Balay #undef __FUNCT__
6404a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
641f15d580aSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
6420bdbc534SSatish Balay {
6430bdbc534SSatish Balay   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
644273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
6458798bf22SSatish Balay   int             ierr,i,j,ii,jj,row,col;
646273d9f13SBarry Smith   int             rstart=baij->rstart ;
647b4cc0f5aSSatish Balay   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
648c2760754SSatish Balay   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
649329f5518SBarry Smith   PetscReal       tmp;
6503eda8832SBarry Smith   MatScalar       **HD = baij->hd,*baij_a;
651f15d580aSBarry Smith   const MatScalar *v_t,*value;
652aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
6534a15367fSSatish Balay   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
6544a15367fSSatish Balay #endif
6550bdbc534SSatish Balay 
656d0a41580SSatish Balay   PetscFunctionBegin;
657d0a41580SSatish Balay 
6580bdbc534SSatish Balay   if (roworiented) {
6590bdbc534SSatish Balay     stepval = (n-1)*bs;
6600bdbc534SSatish Balay   } else {
6610bdbc534SSatish Balay     stepval = (m-1)*bs;
6620bdbc534SSatish Balay   }
6630bdbc534SSatish Balay   for (i=0; i<m; i++) {
664aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
665590ac198SBarry Smith     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
666590ac198SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
6670bdbc534SSatish Balay #endif
6680bdbc534SSatish Balay     row   = im[i];
669187ce0cbSSatish Balay     v_t   = v + i*bs2;
670c2760754SSatish Balay     if (row >= rstart && row < rend) {
6710bdbc534SSatish Balay       for (j=0; j<n; j++) {
6720bdbc534SSatish Balay         col = in[j];
6730bdbc534SSatish Balay 
6740bdbc534SSatish Balay         /* Look up into the Hash Table */
675c2760754SSatish Balay         key = row*Nbs+col+1;
676c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6770bdbc534SSatish Balay 
678c2760754SSatish Balay         idx = h1;
679aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
680187ce0cbSSatish Balay         total_ct++;
681187ce0cbSSatish Balay         insert_ct++;
682187ce0cbSSatish Balay        if (HT[idx] != key) {
683187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
684187ce0cbSSatish Balay           if (idx == size) {
685187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
686187ce0cbSSatish Balay             if (idx == h1) {
687a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
688187ce0cbSSatish Balay             }
689187ce0cbSSatish Balay           }
690187ce0cbSSatish Balay         }
691187ce0cbSSatish Balay #else
692c2760754SSatish Balay         if (HT[idx] != key) {
693c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
694c2760754SSatish Balay           if (idx == size) {
695c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
696c2760754SSatish Balay             if (idx == h1) {
697a45adfd6SMatthew Knepley               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
698c2760754SSatish Balay             }
699c2760754SSatish Balay           }
700c2760754SSatish Balay         }
701187ce0cbSSatish Balay #endif
702c2760754SSatish Balay         baij_a = HD[idx];
7030bdbc534SSatish Balay         if (roworiented) {
704c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
705187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
706187ce0cbSSatish Balay           value = v_t;
707187ce0cbSSatish Balay           v_t  += bs;
708fef45726SSatish Balay           if (addv == ADD_VALUES) {
709c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
710c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
711fef45726SSatish Balay                 baij_a[jj]  += *value++;
712b4cc0f5aSSatish Balay               }
713b4cc0f5aSSatish Balay             }
714fef45726SSatish Balay           } else {
715c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
716c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
717fef45726SSatish Balay                 baij_a[jj]  = *value++;
718fef45726SSatish Balay               }
719fef45726SSatish Balay             }
720fef45726SSatish Balay           }
7210bdbc534SSatish Balay         } else {
7220bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
723fef45726SSatish Balay           if (addv == ADD_VALUES) {
724b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
7250bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
726fef45726SSatish Balay                 baij_a[jj]  += *value++;
727fef45726SSatish Balay               }
728fef45726SSatish Balay             }
729fef45726SSatish Balay           } else {
730fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
731fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
732fef45726SSatish Balay                 baij_a[jj]  = *value++;
733fef45726SSatish Balay               }
734b4cc0f5aSSatish Balay             }
7350bdbc534SSatish Balay           }
7360bdbc534SSatish Balay         }
7370bdbc534SSatish Balay       }
7380bdbc534SSatish Balay     } else {
7390bdbc534SSatish Balay       if (!baij->donotstash) {
7400bdbc534SSatish Balay         if (roworiented) {
7418798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7420bdbc534SSatish Balay         } else {
7438798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7440bdbc534SSatish Balay         }
7450bdbc534SSatish Balay       }
7460bdbc534SSatish Balay     }
7470bdbc534SSatish Balay   }
748aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
749187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
750187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
751187ce0cbSSatish Balay #endif
7520bdbc534SSatish Balay   PetscFunctionReturn(0);
7530bdbc534SSatish Balay }
754133cdb44SSatish Balay 
7554a2ae208SSatish Balay #undef __FUNCT__
7564a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ"
757f15d580aSBarry Smith int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
758d6de1c52SSatish Balay {
759d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
760d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
76148e59246SSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
762d6de1c52SSatish Balay 
763133cdb44SSatish Balay   PetscFunctionBegin;
764d6de1c52SSatish Balay   for (i=0; i<m; i++) {
765590ac198SBarry Smith     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
766590ac198SBarry Smith     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
767d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
768d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
769d6de1c52SSatish Balay       for (j=0; j<n; j++) {
770590ac198SBarry Smith         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
771590ac198SBarry Smith         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
772d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
773d6de1c52SSatish Balay           col = idxn[j] - bscstart;
77498dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
775d64ed03dSBarry Smith         } else {
776905e6a2fSBarry Smith           if (!baij->colmap) {
777905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
778905e6a2fSBarry Smith           }
779aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7800f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
781fa46199cSSatish Balay           data --;
78248e59246SSatish Balay #else
78348e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
78448e59246SSatish Balay #endif
78548e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
786d9d09a02SSatish Balay           else {
78748e59246SSatish Balay             col  = data + idxn[j]%bs;
78898dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
789d6de1c52SSatish Balay           }
790d6de1c52SSatish Balay         }
791d6de1c52SSatish Balay       }
792d64ed03dSBarry Smith     } else {
79329bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
794d6de1c52SSatish Balay     }
795d6de1c52SSatish Balay   }
7963a40ed3dSBarry Smith  PetscFunctionReturn(0);
797d6de1c52SSatish Balay }
798d6de1c52SSatish Balay 
7994a2ae208SSatish Balay #undef __FUNCT__
8004a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ"
801064f8208SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
802d6de1c52SSatish Balay {
803d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
804d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
805acdf5bf4SSatish Balay   int        ierr,i,bs2=baij->bs2;
806329f5518SBarry Smith   PetscReal  sum = 0.0;
8073eda8832SBarry Smith   MatScalar  *v;
808d6de1c52SSatish Balay 
809d64ed03dSBarry Smith   PetscFunctionBegin;
810d6de1c52SSatish Balay   if (baij->size == 1) {
811064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
812d6de1c52SSatish Balay   } else {
813d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
814d6de1c52SSatish Balay       v = amat->a;
815d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++) {
816aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
817329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
818d6de1c52SSatish Balay #else
819d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
820d6de1c52SSatish Balay #endif
821d6de1c52SSatish Balay       }
822d6de1c52SSatish Balay       v = bmat->a;
823d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++) {
824aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
825329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
826d6de1c52SSatish Balay #else
827d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
828d6de1c52SSatish Balay #endif
829d6de1c52SSatish Balay       }
830064f8208SBarry Smith       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
831064f8208SBarry Smith       *nrm = sqrt(*nrm);
832d64ed03dSBarry Smith     } else {
83329bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
834d6de1c52SSatish Balay     }
835d64ed03dSBarry Smith   }
8363a40ed3dSBarry Smith   PetscFunctionReturn(0);
837d6de1c52SSatish Balay }
83857b952d6SSatish Balay 
839bd7f49f5SSatish Balay 
840fef45726SSatish Balay /*
841fef45726SSatish Balay   Creates the hash table, and sets the table
842fef45726SSatish Balay   This table is created only once.
843fef45726SSatish Balay   If new entried need to be added to the matrix
844fef45726SSatish Balay   then the hash table has to be destroyed and
845fef45726SSatish Balay   recreated.
846fef45726SSatish Balay */
8474a2ae208SSatish Balay #undef __FUNCT__
8484a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
849329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
850596b8d2eSBarry Smith {
851596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
852596b8d2eSBarry Smith   Mat         A = baij->A,B=baij->B;
853596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
8540bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
855549d3d68SSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
856187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
857fef45726SSatish Balay   int         *HT,key;
8583eda8832SBarry Smith   MatScalar   **HD;
859329f5518SBarry Smith   PetscReal   tmp;
860aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
8614a15367fSSatish Balay   int         ct=0,max=0;
8624a15367fSSatish Balay #endif
863fef45726SSatish Balay 
864d64ed03dSBarry Smith   PetscFunctionBegin;
8650bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
8660bdbc534SSatish Balay   size = baij->ht_size;
867fef45726SSatish Balay 
8680bdbc534SSatish Balay   if (baij->ht) {
8690bdbc534SSatish Balay     PetscFunctionReturn(0);
870596b8d2eSBarry Smith   }
8710bdbc534SSatish Balay 
872fef45726SSatish Balay   /* Allocate Memory for Hash Table */
873b0a32e0cSBarry Smith   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
874b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
875b9e4cc15SSatish Balay   HD       = baij->hd;
876a07cd24cSSatish Balay   HT       = baij->ht;
877b9e4cc15SSatish Balay 
878b9e4cc15SSatish Balay 
87987828ca2SBarry Smith   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
8800bdbc534SSatish Balay 
881596b8d2eSBarry Smith 
882596b8d2eSBarry Smith   /* Loop Over A */
8830bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
884596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
8850bdbc534SSatish Balay       row = i+rstart;
8860bdbc534SSatish Balay       col = aj[j]+cstart;
887596b8d2eSBarry Smith 
888187ce0cbSSatish Balay       key = row*Nbs + col + 1;
889c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8900bdbc534SSatish Balay       for (k=0; k<size; k++){
8910bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8920bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8930bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
894596b8d2eSBarry Smith           break;
895aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
896187ce0cbSSatish Balay         } else {
897187ce0cbSSatish Balay           ct++;
898187ce0cbSSatish Balay #endif
899596b8d2eSBarry Smith         }
900187ce0cbSSatish Balay       }
901aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
902187ce0cbSSatish Balay       if (k> max) max = k;
903187ce0cbSSatish Balay #endif
904596b8d2eSBarry Smith     }
905596b8d2eSBarry Smith   }
906596b8d2eSBarry Smith   /* Loop Over B */
9070bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
908596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
9090bdbc534SSatish Balay       row = i+rstart;
9100bdbc534SSatish Balay       col = garray[bj[j]];
911187ce0cbSSatish Balay       key = row*Nbs + col + 1;
912c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9130bdbc534SSatish Balay       for (k=0; k<size; k++){
9140bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
9150bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9160bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
917596b8d2eSBarry Smith           break;
918aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
919187ce0cbSSatish Balay         } else {
920187ce0cbSSatish Balay           ct++;
921187ce0cbSSatish Balay #endif
922596b8d2eSBarry Smith         }
923187ce0cbSSatish Balay       }
924aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
925187ce0cbSSatish Balay       if (k> max) max = k;
926187ce0cbSSatish Balay #endif
927596b8d2eSBarry Smith     }
928596b8d2eSBarry Smith   }
929596b8d2eSBarry Smith 
930596b8d2eSBarry Smith   /* Print Summary */
931aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
932c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
933596b8d2eSBarry Smith     if (HT[i]) {j++;}
934c38d4ed2SBarry Smith   }
935b0a32e0cSBarry Smith   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
936187ce0cbSSatish Balay #endif
9373a40ed3dSBarry Smith   PetscFunctionReturn(0);
938596b8d2eSBarry Smith }
93957b952d6SSatish Balay 
9404a2ae208SSatish Balay #undef __FUNCT__
9414a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
942bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
943bbb85fb3SSatish Balay {
944bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
945ff2fd236SBarry Smith   int         ierr,nstash,reallocs;
946bbb85fb3SSatish Balay   InsertMode  addv;
947bbb85fb3SSatish Balay 
948bbb85fb3SSatish Balay   PetscFunctionBegin;
949bbb85fb3SSatish Balay   if (baij->donotstash) {
950bbb85fb3SSatish Balay     PetscFunctionReturn(0);
951bbb85fb3SSatish Balay   }
952bbb85fb3SSatish Balay 
953bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
954bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
955bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
95629bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
957bbb85fb3SSatish Balay   }
958bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
959bbb85fb3SSatish Balay 
9608798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
9618798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
9628798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
963b0a32e0cSBarry Smith   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
96446680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
965b0a32e0cSBarry Smith   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
966bbb85fb3SSatish Balay   PetscFunctionReturn(0);
967bbb85fb3SSatish Balay }
968bbb85fb3SSatish Balay 
9694a2ae208SSatish Balay #undef __FUNCT__
9704a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
971bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
972bbb85fb3SSatish Balay {
973bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
974a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
975a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
9767c922b88SBarry Smith   int         *row,*col,other_disassembled;
9777c922b88SBarry Smith   PetscTruth  r1,r2,r3;
9783eda8832SBarry Smith   MatScalar   *val;
979bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
980bbb85fb3SSatish Balay 
981bbb85fb3SSatish Balay   PetscFunctionBegin;
982bbb85fb3SSatish Balay   if (!baij->donotstash) {
983a2d1c673SSatish Balay     while (1) {
9848798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
985a2d1c673SSatish Balay       if (!flg) break;
986a2d1c673SSatish Balay 
987bbb85fb3SSatish Balay       for (i=0; i<n;) {
988bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
989bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
990bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
991bbb85fb3SSatish Balay         else       ncols = n-i;
992bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
99393fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
994bbb85fb3SSatish Balay         i = j;
995bbb85fb3SSatish Balay       }
996bbb85fb3SSatish Balay     }
9978798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
998a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
999a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1000a2d1c673SSatish Balay        restore the original flags */
1001a2d1c673SSatish Balay     r1 = baij->roworiented;
1002a2d1c673SSatish Balay     r2 = a->roworiented;
1003a2d1c673SSatish Balay     r3 = b->roworiented;
10047c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
10057c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
10067c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
1007a2d1c673SSatish Balay     while (1) {
10088798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1009a2d1c673SSatish Balay       if (!flg) break;
1010a2d1c673SSatish Balay 
1011a2d1c673SSatish Balay       for (i=0; i<n;) {
1012a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1013a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1014a2d1c673SSatish Balay         if (j < n) ncols = j-i;
1015a2d1c673SSatish Balay         else       ncols = n-i;
101693fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1017a2d1c673SSatish Balay         i = j;
1018a2d1c673SSatish Balay       }
1019a2d1c673SSatish Balay     }
10208798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1021a2d1c673SSatish Balay     baij->roworiented = r1;
1022a2d1c673SSatish Balay     a->roworiented    = r2;
1023a2d1c673SSatish Balay     b->roworiented    = r3;
1024bbb85fb3SSatish Balay   }
1025bbb85fb3SSatish Balay 
1026bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1027bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1028bbb85fb3SSatish Balay 
1029bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
1030bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1031bbb85fb3SSatish Balay   /*
1032bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1033bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1034bbb85fb3SSatish Balay   */
1035bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1036bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1037bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1038bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1039bbb85fb3SSatish Balay     }
1040bbb85fb3SSatish Balay   }
1041bbb85fb3SSatish Balay 
1042bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1043bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1044bbb85fb3SSatish Balay   }
1045bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1046bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1047bbb85fb3SSatish Balay 
1048aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
1049bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1050f6275e2eSBarry Smith     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1051bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1052bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1053bbb85fb3SSatish Balay   }
1054bbb85fb3SSatish Balay #endif
1055bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1056bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1057bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1058bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1059bbb85fb3SSatish Balay   }
1060bbb85fb3SSatish Balay 
1061606d414cSSatish Balay   if (baij->rowvalues) {
1062606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1063606d414cSSatish Balay     baij->rowvalues = 0;
1064606d414cSSatish Balay   }
1065bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1066bbb85fb3SSatish Balay }
106757b952d6SSatish Balay 
10684a2ae208SSatish Balay #undef __FUNCT__
10694a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1070b0a32e0cSBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
107157b952d6SSatish Balay {
107257b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1073fb9695e5SSatish Balay   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
10746831982aSBarry Smith   PetscTruth        isascii,isdraw;
1075b0a32e0cSBarry Smith   PetscViewer       sviewer;
1076f3ef73ceSBarry Smith   PetscViewerFormat format;
107757b952d6SSatish Balay 
1078d64ed03dSBarry Smith   PetscFunctionBegin;
1079f81bd7ccSHong Zhang   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1080b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1081fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
10820f5bd95cSBarry Smith   if (isascii) {
1083b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1084456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
10854e220ebcSLois Curfman McInnes       MatInfo info;
1086d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1087d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1088b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1089273d9f13SBarry Smith               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
10906831982aSBarry Smith               baij->bs,(int)info.memory);CHKERRQ(ierr);
1091d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1092b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1093d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1094b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1095b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
109657b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
10973a40ed3dSBarry Smith       PetscFunctionReturn(0);
1098fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1099b0a32e0cSBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
11003a40ed3dSBarry Smith       PetscFunctionReturn(0);
110104929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
110204929863SHong Zhang       PetscFunctionReturn(0);
110357b952d6SSatish Balay     }
110457b952d6SSatish Balay   }
110557b952d6SSatish Balay 
11060f5bd95cSBarry Smith   if (isdraw) {
1107b0a32e0cSBarry Smith     PetscDraw       draw;
110857b952d6SSatish Balay     PetscTruth isnull;
1109b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1110b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
111157b952d6SSatish Balay   }
111257b952d6SSatish Balay 
111357b952d6SSatish Balay   if (size == 1) {
1114873048abSBarry Smith     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
111557b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1116d64ed03dSBarry Smith   } else {
111757b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
111857b952d6SSatish Balay     Mat         A;
111957b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1120273d9f13SBarry Smith     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
11213eda8832SBarry Smith     MatScalar   *a;
112257b952d6SSatish Balay 
1123f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1124f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
112557b952d6SSatish Balay     if (!rank) {
1126f204ca49SKris Buschelman       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1127d64ed03dSBarry Smith     } else {
1128f204ca49SKris Buschelman       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
112957b952d6SSatish Balay     }
1130f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1131f204ca49SKris Buschelman     ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1132b0a32e0cSBarry Smith     PetscLogObjectParent(mat,A);
113357b952d6SSatish Balay 
113457b952d6SSatish Balay     /* copy over the A part */
113557b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
113657b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
113782502324SSatish Balay     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
113857b952d6SSatish Balay 
113957b952d6SSatish Balay     for (i=0; i<mbs; i++) {
114057b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
114157b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
114257b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
114357b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
114457b952d6SSatish Balay         for (k=0; k<bs; k++) {
114593fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1146cee3aa6bSSatish Balay           col++; a += bs;
114757b952d6SSatish Balay         }
114857b952d6SSatish Balay       }
114957b952d6SSatish Balay     }
115057b952d6SSatish Balay     /* copy over the B part */
115157b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
115257b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
115357b952d6SSatish Balay     for (i=0; i<mbs; i++) {
115457b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
115557b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
115657b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
115757b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
115857b952d6SSatish Balay         for (k=0; k<bs; k++) {
115993fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1160cee3aa6bSSatish Balay           col++; a += bs;
116157b952d6SSatish Balay         }
116257b952d6SSatish Balay       }
116357b952d6SSatish Balay     }
1164606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
11656d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
11666d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
116755843e3eSBarry Smith     /*
116855843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1169b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
117055843e3eSBarry Smith     */
1171b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1172f14a1c24SBarry Smith     if (!rank) {
1173e36acaf3SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
11746831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
117557b952d6SSatish Balay     }
1176b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
117757b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
117857b952d6SSatish Balay   }
11793a40ed3dSBarry Smith   PetscFunctionReturn(0);
118057b952d6SSatish Balay }
118157b952d6SSatish Balay 
11824a2ae208SSatish Balay #undef __FUNCT__
11834a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ"
1184b0a32e0cSBarry Smith int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
118557b952d6SSatish Balay {
118657b952d6SSatish Balay   int        ierr;
11876831982aSBarry Smith   PetscTruth isascii,isdraw,issocket,isbinary;
118857b952d6SSatish Balay 
1189d64ed03dSBarry Smith   PetscFunctionBegin;
1190b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1191fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1192b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1193fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
11940f5bd95cSBarry Smith   if (isascii || isdraw || issocket || isbinary) {
11957b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
11965cd90555SBarry Smith   } else {
119729bbc08cSBarry Smith     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
119857b952d6SSatish Balay   }
11993a40ed3dSBarry Smith   PetscFunctionReturn(0);
120057b952d6SSatish Balay }
120157b952d6SSatish Balay 
12024a2ae208SSatish Balay #undef __FUNCT__
12034a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ"
1204e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
120579bdfe76SSatish Balay {
120679bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
120779bdfe76SSatish Balay   int         ierr;
120879bdfe76SSatish Balay 
1209d64ed03dSBarry Smith   PetscFunctionBegin;
1210aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1211b0a32e0cSBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
121279bdfe76SSatish Balay #endif
12138798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12148798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1215606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
121679bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
121779bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1218aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
12190f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
122048e59246SSatish Balay #else
1221606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
122248e59246SSatish Balay #endif
1223606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1224606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1225606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1226606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1227606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1228606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
12296fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
12306fa18ffdSBarry Smith   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
12316fa18ffdSBarry Smith #endif
1232606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
12333a40ed3dSBarry Smith   PetscFunctionReturn(0);
123479bdfe76SSatish Balay }
123579bdfe76SSatish Balay 
12364a2ae208SSatish Balay #undef __FUNCT__
12374a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ"
1238ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1239cee3aa6bSSatish Balay {
1240cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
124147b4a8eaSLois Curfman McInnes   int         ierr,nt;
1242cee3aa6bSSatish Balay 
1243d64ed03dSBarry Smith   PetscFunctionBegin;
1244e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1245273d9f13SBarry Smith   if (nt != A->n) {
124629bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
124747b4a8eaSLois Curfman McInnes   }
1248e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1249273d9f13SBarry Smith   if (nt != A->m) {
125029bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
125147b4a8eaSLois Curfman McInnes   }
125243a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1253f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
125443a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1255f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
125643a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
12573a40ed3dSBarry Smith   PetscFunctionReturn(0);
1258cee3aa6bSSatish Balay }
1259cee3aa6bSSatish Balay 
12604a2ae208SSatish Balay #undef __FUNCT__
12614a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1262ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1263cee3aa6bSSatish Balay {
1264cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1265cee3aa6bSSatish Balay   int        ierr;
1266d64ed03dSBarry Smith 
1267d64ed03dSBarry Smith   PetscFunctionBegin;
126843a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1269f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
127043a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1271f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12723a40ed3dSBarry Smith   PetscFunctionReturn(0);
1273cee3aa6bSSatish Balay }
1274cee3aa6bSSatish Balay 
12754a2ae208SSatish Balay #undef __FUNCT__
12764a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
12777c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1278cee3aa6bSSatish Balay {
1279cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1280cee3aa6bSSatish Balay   int         ierr;
1281cee3aa6bSSatish Balay 
1282d64ed03dSBarry Smith   PetscFunctionBegin;
1283cee3aa6bSSatish Balay   /* do nondiagonal part */
12847c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1285cee3aa6bSSatish Balay   /* send it on its way */
1286537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1287cee3aa6bSSatish Balay   /* do local part */
12887c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1289cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1290cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1291cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1292639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12933a40ed3dSBarry Smith   PetscFunctionReturn(0);
1294cee3aa6bSSatish Balay }
1295cee3aa6bSSatish Balay 
12964a2ae208SSatish Balay #undef __FUNCT__
12974a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
12987c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1299cee3aa6bSSatish Balay {
1300cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1301cee3aa6bSSatish Balay   int         ierr;
1302cee3aa6bSSatish Balay 
1303d64ed03dSBarry Smith   PetscFunctionBegin;
1304cee3aa6bSSatish Balay   /* do nondiagonal part */
13057c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1306cee3aa6bSSatish Balay   /* send it on its way */
1307537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1308cee3aa6bSSatish Balay   /* do local part */
13097c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1310cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1311cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1312cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1313537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
13143a40ed3dSBarry Smith   PetscFunctionReturn(0);
1315cee3aa6bSSatish Balay }
1316cee3aa6bSSatish Balay 
1317cee3aa6bSSatish Balay /*
1318cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1319cee3aa6bSSatish Balay    diagonal block
1320cee3aa6bSSatish Balay */
13214a2ae208SSatish Balay #undef __FUNCT__
13224a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1323ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1324cee3aa6bSSatish Balay {
1325cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
13263a40ed3dSBarry Smith   int         ierr;
1327d64ed03dSBarry Smith 
1328d64ed03dSBarry Smith   PetscFunctionBegin;
1329273d9f13SBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13303a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13313a40ed3dSBarry Smith   PetscFunctionReturn(0);
1332cee3aa6bSSatish Balay }
1333cee3aa6bSSatish Balay 
13344a2ae208SSatish Balay #undef __FUNCT__
13354a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ"
1336268466fbSBarry Smith int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1337cee3aa6bSSatish Balay {
1338cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1339cee3aa6bSSatish Balay   int         ierr;
1340d64ed03dSBarry Smith 
1341d64ed03dSBarry Smith   PetscFunctionBegin;
1342cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1343cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
13443a40ed3dSBarry Smith   PetscFunctionReturn(0);
1345cee3aa6bSSatish Balay }
1346026e39d0SSatish Balay 
13474a2ae208SSatish Balay #undef __FUNCT__
13484a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ"
134987828ca2SBarry Smith int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1350acdf5bf4SSatish Balay {
1351acdf5bf4SSatish Balay   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
135287828ca2SBarry Smith   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1353acdf5bf4SSatish Balay   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1354d9d09a02SSatish Balay   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1355d9d09a02SSatish Balay   int          *cmap,*idx_p,cstart = mat->cstart;
1356acdf5bf4SSatish Balay 
1357d64ed03dSBarry Smith   PetscFunctionBegin;
135829bbc08cSBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1359acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1360acdf5bf4SSatish Balay 
1361acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1362acdf5bf4SSatish Balay     /*
1363acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1364acdf5bf4SSatish Balay     */
1365acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1366bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1367bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1368acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1369acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1370acdf5bf4SSatish Balay     }
137187828ca2SBarry Smith     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1372acdf5bf4SSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1373acdf5bf4SSatish Balay   }
1374acdf5bf4SSatish Balay 
137529bbc08cSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1376d9d09a02SSatish Balay   lrow = row - brstart;
1377acdf5bf4SSatish Balay 
1378acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1379acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1380acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1381f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1382f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1383acdf5bf4SSatish Balay   nztot = nzA + nzB;
1384acdf5bf4SSatish Balay 
1385acdf5bf4SSatish Balay   cmap  = mat->garray;
1386acdf5bf4SSatish Balay   if (v  || idx) {
1387acdf5bf4SSatish Balay     if (nztot) {
1388acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1389acdf5bf4SSatish Balay       int imark = -1;
1390acdf5bf4SSatish Balay       if (v) {
1391acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1392acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1393d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1394acdf5bf4SSatish Balay           else break;
1395acdf5bf4SSatish Balay         }
1396acdf5bf4SSatish Balay         imark = i;
1397acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1398acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1399acdf5bf4SSatish Balay       }
1400acdf5bf4SSatish Balay       if (idx) {
1401acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1402acdf5bf4SSatish Balay         if (imark > -1) {
1403acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1404bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1405acdf5bf4SSatish Balay           }
1406acdf5bf4SSatish Balay         } else {
1407acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1408d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1409d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1410acdf5bf4SSatish Balay             else break;
1411acdf5bf4SSatish Balay           }
1412acdf5bf4SSatish Balay           imark = i;
1413acdf5bf4SSatish Balay         }
1414d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1415d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1416acdf5bf4SSatish Balay       }
1417d64ed03dSBarry Smith     } else {
1418d212a18eSSatish Balay       if (idx) *idx = 0;
1419d212a18eSSatish Balay       if (v)   *v   = 0;
1420d212a18eSSatish Balay     }
1421acdf5bf4SSatish Balay   }
1422acdf5bf4SSatish Balay   *nz = nztot;
1423f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1424f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14253a40ed3dSBarry Smith   PetscFunctionReturn(0);
1426acdf5bf4SSatish Balay }
1427acdf5bf4SSatish Balay 
14284a2ae208SSatish Balay #undef __FUNCT__
14294a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
143087828ca2SBarry Smith int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1431acdf5bf4SSatish Balay {
1432acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1433d64ed03dSBarry Smith 
1434d64ed03dSBarry Smith   PetscFunctionBegin;
1435acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
143629bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1437acdf5bf4SSatish Balay   }
1438acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14393a40ed3dSBarry Smith   PetscFunctionReturn(0);
1440acdf5bf4SSatish Balay }
1441acdf5bf4SSatish Balay 
14424a2ae208SSatish Balay #undef __FUNCT__
14434a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1444ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
14455a838052SSatish Balay {
14465a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1447d64ed03dSBarry Smith 
1448d64ed03dSBarry Smith   PetscFunctionBegin;
14495a838052SSatish Balay   *bs = baij->bs;
14503a40ed3dSBarry Smith   PetscFunctionReturn(0);
14515a838052SSatish Balay }
14525a838052SSatish Balay 
14534a2ae208SSatish Balay #undef __FUNCT__
14544a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1455ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
145658667388SSatish Balay {
145758667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
145858667388SSatish Balay   int         ierr;
1459d64ed03dSBarry Smith 
1460d64ed03dSBarry Smith   PetscFunctionBegin;
146158667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
146258667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14633a40ed3dSBarry Smith   PetscFunctionReturn(0);
146458667388SSatish Balay }
14650ac07820SSatish Balay 
14664a2ae208SSatish Balay #undef __FUNCT__
14674a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1468ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14690ac07820SSatish Balay {
14704e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
14714e220ebcSLois Curfman McInnes   Mat         A = a->A,B = a->B;
14727d57db60SLois Curfman McInnes   int         ierr;
1473329f5518SBarry Smith   PetscReal   isend[5],irecv[5];
14740ac07820SSatish Balay 
1475d64ed03dSBarry Smith   PetscFunctionBegin;
1476f6275e2eSBarry Smith   info->block_size     = (PetscReal)a->bs;
14774e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
14780e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1479de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14804e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
14810e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1482de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
14830ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14844e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14854e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14864e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14874e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14884e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14890ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1490d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
14914e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14924e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14934e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14944e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14954e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14960ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1497d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
14984e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14994e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15004e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15014e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15024e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1503d41123aaSBarry Smith   } else {
150429bbc08cSBarry Smith     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
15050ac07820SSatish Balay   }
1506f6275e2eSBarry Smith   info->rows_global       = (PetscReal)A->M;
1507f6275e2eSBarry Smith   info->columns_global    = (PetscReal)A->N;
1508f6275e2eSBarry Smith   info->rows_local        = (PetscReal)A->m;
1509f6275e2eSBarry Smith   info->columns_local     = (PetscReal)A->N;
15104e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
15114e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
15124e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
15133a40ed3dSBarry Smith   PetscFunctionReturn(0);
15140ac07820SSatish Balay }
15150ac07820SSatish Balay 
15164a2ae208SSatish Balay #undef __FUNCT__
15174a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ"
1518ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
151958667388SSatish Balay {
152058667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
152198305bb5SBarry Smith   int         ierr;
152258667388SSatish Balay 
1523d64ed03dSBarry Smith   PetscFunctionBegin;
152412c028f9SKris Buschelman   switch (op) {
152512c028f9SKris Buschelman   case MAT_NO_NEW_NONZERO_LOCATIONS:
152612c028f9SKris Buschelman   case MAT_YES_NEW_NONZERO_LOCATIONS:
152712c028f9SKris Buschelman   case MAT_COLUMNS_UNSORTED:
152812c028f9SKris Buschelman   case MAT_COLUMNS_SORTED:
152912c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
153012c028f9SKris Buschelman   case MAT_KEEP_ZEROED_ROWS:
153112c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
153298305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
153398305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
153412c028f9SKris Buschelman     break;
153512c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
15367c922b88SBarry Smith     a->roworiented = PETSC_TRUE;
153798305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
153898305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
153912c028f9SKris Buschelman     break;
154012c028f9SKris Buschelman   case MAT_ROWS_SORTED:
154112c028f9SKris Buschelman   case MAT_ROWS_UNSORTED:
154212c028f9SKris Buschelman   case MAT_YES_NEW_DIAGONALS:
1543b0a32e0cSBarry Smith     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
154412c028f9SKris Buschelman     break;
154512c028f9SKris Buschelman   case MAT_COLUMN_ORIENTED:
15467c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
154798305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
154898305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
154912c028f9SKris Buschelman     break;
155012c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15517c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
155212c028f9SKris Buschelman     break;
155312c028f9SKris Buschelman   case MAT_NO_NEW_DIAGONALS:
155429bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
155512c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
15567c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
155712c028f9SKris Buschelman     break;
155877e54ba9SKris Buschelman   case MAT_SYMMETRIC:
155977e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
15602188ac68SBarry Smith   case MAT_HERMITIAN:
15612188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
15622188ac68SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
15632188ac68SBarry Smith     break;
15649a4540c5SBarry Smith   case MAT_NOT_SYMMETRIC:
15659a4540c5SBarry Smith   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
15669a4540c5SBarry Smith   case MAT_NOT_HERMITIAN:
15679a4540c5SBarry Smith   case MAT_NOT_SYMMETRY_ETERNAL:
156877e54ba9SKris Buschelman     break;
156912c028f9SKris Buschelman   default:
157029bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"unknown option");
1571d64ed03dSBarry Smith   }
15723a40ed3dSBarry Smith   PetscFunctionReturn(0);
157358667388SSatish Balay }
157458667388SSatish Balay 
15754a2ae208SSatish Balay #undef __FUNCT__
15764a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ("
1577ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15780ac07820SSatish Balay {
15790ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
15800ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15810ac07820SSatish Balay   Mat         B;
1582273d9f13SBarry Smith   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
15830ac07820SSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
15843eda8832SBarry Smith   MatScalar   *a;
15850ac07820SSatish Balay 
1586d64ed03dSBarry Smith   PetscFunctionBegin;
158729bbc08cSBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1588f204ca49SKris Buschelman   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1589f204ca49SKris Buschelman   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1590f204ca49SKris Buschelman   ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
15910ac07820SSatish Balay 
15920ac07820SSatish Balay   /* copy over the A part */
15930ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15940ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
159582502324SSatish Balay   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
15960ac07820SSatish Balay 
15970ac07820SSatish Balay   for (i=0; i<mbs; i++) {
15980ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15990ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16000ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16010ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
16020ac07820SSatish Balay       for (k=0; k<bs; k++) {
160393fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16040ac07820SSatish Balay         col++; a += bs;
16050ac07820SSatish Balay       }
16060ac07820SSatish Balay     }
16070ac07820SSatish Balay   }
16080ac07820SSatish Balay   /* copy over the B part */
16090ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
16100ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
16110ac07820SSatish Balay   for (i=0; i<mbs; i++) {
16120ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16130ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16140ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16150ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16160ac07820SSatish Balay       for (k=0; k<bs; k++) {
161793fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16180ac07820SSatish Balay         col++; a += bs;
16190ac07820SSatish Balay       }
16200ac07820SSatish Balay     }
16210ac07820SSatish Balay   }
1622606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
16230ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16240ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16250ac07820SSatish Balay 
16267c922b88SBarry Smith   if (matout) {
16270ac07820SSatish Balay     *matout = B;
16280ac07820SSatish Balay   } else {
1629273d9f13SBarry Smith     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
16300ac07820SSatish Balay   }
16313a40ed3dSBarry Smith   PetscFunctionReturn(0);
16320ac07820SSatish Balay }
16330e95ebc0SSatish Balay 
16344a2ae208SSatish Balay #undef __FUNCT__
16354a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
163636c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16370e95ebc0SSatish Balay {
163836c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
163936c4a09eSSatish Balay   Mat         a = baij->A,b = baij->B;
16400e95ebc0SSatish Balay   int         ierr,s1,s2,s3;
16410e95ebc0SSatish Balay 
1642d64ed03dSBarry Smith   PetscFunctionBegin;
164336c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
164436c4a09eSSatish Balay   if (rr) {
164536c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
164629bbc08cSBarry Smith     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
164736c4a09eSSatish Balay     /* Overlap communication with computation. */
164836c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
164936c4a09eSSatish Balay   }
16500e95ebc0SSatish Balay   if (ll) {
16510e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
165229bbc08cSBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1653a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
16540e95ebc0SSatish Balay   }
165536c4a09eSSatish Balay   /* scale  the diagonal block */
165636c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
165736c4a09eSSatish Balay 
165836c4a09eSSatish Balay   if (rr) {
165936c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
166036c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1661a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
166236c4a09eSSatish Balay   }
166336c4a09eSSatish Balay 
16643a40ed3dSBarry Smith   PetscFunctionReturn(0);
16650e95ebc0SSatish Balay }
16660e95ebc0SSatish Balay 
16674a2ae208SSatish Balay #undef __FUNCT__
16684a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1669268466fbSBarry Smith int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
16700ac07820SSatish Balay {
16710ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
16720ac07820SSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1673c1dc657dSBarry Smith   int            *nprocs,j,idx,nsends,row;
16740ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
16750ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1676a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16770ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16780ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16790ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16800ac07820SSatish Balay   IS             istmp;
168135d8aa7fSBarry Smith   PetscTruth     found;
16820ac07820SSatish Balay 
1683d64ed03dSBarry Smith   PetscFunctionBegin;
1684f14a1c24SBarry Smith   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
16850ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
16860ac07820SSatish Balay 
16870ac07820SSatish Balay   /*  first count number of contributors to each processor */
168882502324SSatish Balay   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1689549d3d68SSatish Balay   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1690b0a32e0cSBarry Smith   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
16910ac07820SSatish Balay   for (i=0; i<N; i++) {
16920ac07820SSatish Balay     idx   = rows[i];
169335d8aa7fSBarry Smith     found = PETSC_FALSE;
16940ac07820SSatish Balay     for (j=0; j<size; j++) {
16950ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1696c1dc657dSBarry Smith         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
16970ac07820SSatish Balay       }
16980ac07820SSatish Balay     }
169929bbc08cSBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
17000ac07820SSatish Balay   }
1701c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
17020ac07820SSatish Balay 
17030ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
1704c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
17050ac07820SSatish Balay 
17060ac07820SSatish Balay   /* post receives:   */
1707b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1708b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
17090ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1710ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
17110ac07820SSatish Balay   }
17120ac07820SSatish Balay 
17130ac07820SSatish Balay   /* do sends:
17140ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
17150ac07820SSatish Balay      the ith processor
17160ac07820SSatish Balay   */
1717b0a32e0cSBarry Smith   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1718b0a32e0cSBarry Smith   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1719b0a32e0cSBarry Smith   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
17200ac07820SSatish Balay   starts[0]  = 0;
1721c1dc657dSBarry Smith   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17220ac07820SSatish Balay   for (i=0; i<N; i++) {
17230ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
17240ac07820SSatish Balay   }
17256831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
17260ac07820SSatish Balay 
17270ac07820SSatish Balay   starts[0] = 0;
1728c1dc657dSBarry Smith   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17290ac07820SSatish Balay   count = 0;
17300ac07820SSatish Balay   for (i=0; i<size; i++) {
1731c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
1732c1dc657dSBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17330ac07820SSatish Balay     }
17340ac07820SSatish Balay   }
1735606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
17360ac07820SSatish Balay 
17370ac07820SSatish Balay   base = owners[rank]*bs;
17380ac07820SSatish Balay 
17390ac07820SSatish Balay   /*  wait on receives */
1740b0a32e0cSBarry Smith   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
17410ac07820SSatish Balay   source = lens + nrecvs;
17420ac07820SSatish Balay   count  = nrecvs; slen = 0;
17430ac07820SSatish Balay   while (count) {
1744ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17450ac07820SSatish Balay     /* unpack receives into our local space */
1746ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
17470ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17480ac07820SSatish Balay     lens[imdex]    = n;
17490ac07820SSatish Balay     slen          += n;
17500ac07820SSatish Balay     count--;
17510ac07820SSatish Balay   }
1752606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
17530ac07820SSatish Balay 
17540ac07820SSatish Balay   /* move the data into the send scatter */
1755b0a32e0cSBarry Smith   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
17560ac07820SSatish Balay   count = 0;
17570ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
17580ac07820SSatish Balay     values = rvalues + i*nmax;
17590ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
17600ac07820SSatish Balay       lrows[count++] = values[j] - base;
17610ac07820SSatish Balay     }
17620ac07820SSatish Balay   }
1763606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1764606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1765606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1766606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
17670ac07820SSatish Balay 
17680ac07820SSatish Balay   /* actually zap the local rows */
1769029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1770b0a32e0cSBarry Smith   PetscLogObjectParent(A,istmp);
1771a07cd24cSSatish Balay 
177272dacd9aSBarry Smith   /*
177372dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
177472dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
177572dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
177672dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
177772dacd9aSBarry Smith 
177872dacd9aSBarry Smith        Contributed by: Mathew Knepley
177972dacd9aSBarry Smith   */
17809c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
17816fa18ffdSBarry Smith   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
17829c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
17836fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
17849c957beeSSatish Balay   } else if (diag) {
17856fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1786fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
178729bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1788fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
17896525c446SSatish Balay     }
1790a07cd24cSSatish Balay     for (i=0; i<slen; i++) {
1791a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
17923f11ea53SBarry Smith       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1793a07cd24cSSatish Balay     }
1794a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17969c957beeSSatish Balay   } else {
17976fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1798a07cd24cSSatish Balay   }
17999c957beeSSatish Balay 
18009c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1801606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1802a07cd24cSSatish Balay 
18030ac07820SSatish Balay   /* wait on sends */
18040ac07820SSatish Balay   if (nsends) {
180582502324SSatish Balay     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1806ca161407SBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1807606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
18080ac07820SSatish Balay   }
1809606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1810606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
18110ac07820SSatish Balay 
18123a40ed3dSBarry Smith   PetscFunctionReturn(0);
18130ac07820SSatish Balay }
181472dacd9aSBarry Smith 
18154a2ae208SSatish Balay #undef __FUNCT__
18164a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1817ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1818ba4ca20aSSatish Balay {
1819ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
182025fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1821133cdb44SSatish Balay   static int  called = 0;
18223a40ed3dSBarry Smith   int         ierr;
1823ba4ca20aSSatish Balay 
1824d64ed03dSBarry Smith   PetscFunctionBegin;
18253a40ed3dSBarry Smith   if (!a->rank) {
18263a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
182725fdafccSSatish Balay   }
182825fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1829d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1830d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
18313a40ed3dSBarry Smith   PetscFunctionReturn(0);
1832ba4ca20aSSatish Balay }
18330ac07820SSatish Balay 
18344a2ae208SSatish Balay #undef __FUNCT__
18354a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1836ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1837bb5a7306SBarry Smith {
1838bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1839bb5a7306SBarry Smith   int         ierr;
1840d64ed03dSBarry Smith 
1841d64ed03dSBarry Smith   PetscFunctionBegin;
1842bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
18433a40ed3dSBarry Smith   PetscFunctionReturn(0);
1844bb5a7306SBarry Smith }
1845bb5a7306SBarry Smith 
18462e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18470ac07820SSatish Balay 
18484a2ae208SSatish Balay #undef __FUNCT__
18494a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ"
18507fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
18517fc3c18eSBarry Smith {
18527fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
18537fc3c18eSBarry Smith   Mat         a,b,c,d;
18547fc3c18eSBarry Smith   PetscTruth  flg;
18557fc3c18eSBarry Smith   int         ierr;
18567fc3c18eSBarry Smith 
18577fc3c18eSBarry Smith   PetscFunctionBegin;
18587fc3c18eSBarry Smith   a = matA->A; b = matA->B;
18597fc3c18eSBarry Smith   c = matB->A; d = matB->B;
18607fc3c18eSBarry Smith 
18617fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
18627fc3c18eSBarry Smith   if (flg == PETSC_TRUE) {
18637fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18647fc3c18eSBarry Smith   }
18657fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
18667fc3c18eSBarry Smith   PetscFunctionReturn(0);
18677fc3c18eSBarry Smith }
18687fc3c18eSBarry Smith 
1869273d9f13SBarry Smith 
18704a2ae208SSatish Balay #undef __FUNCT__
18714a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1872273d9f13SBarry Smith int MatSetUpPreallocation_MPIBAIJ(Mat A)
1873273d9f13SBarry Smith {
1874273d9f13SBarry Smith   int        ierr;
1875273d9f13SBarry Smith 
1876273d9f13SBarry Smith   PetscFunctionBegin;
1877273d9f13SBarry Smith   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1878273d9f13SBarry Smith   PetscFunctionReturn(0);
1879273d9f13SBarry Smith }
1880273d9f13SBarry Smith 
188179bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1882cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1883cc2dc46cSBarry Smith        MatSetValues_MPIBAIJ,
1884cc2dc46cSBarry Smith        MatGetRow_MPIBAIJ,
1885cc2dc46cSBarry Smith        MatRestoreRow_MPIBAIJ,
1886cc2dc46cSBarry Smith        MatMult_MPIBAIJ,
188797304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ,
18887c922b88SBarry Smith        MatMultTranspose_MPIBAIJ,
18897c922b88SBarry Smith        MatMultTransposeAdd_MPIBAIJ,
1890cc2dc46cSBarry Smith        0,
1891cc2dc46cSBarry Smith        0,
1892cc2dc46cSBarry Smith        0,
189397304618SKris Buschelman /*10*/ 0,
1894cc2dc46cSBarry Smith        0,
1895cc2dc46cSBarry Smith        0,
1896cc2dc46cSBarry Smith        0,
1897cc2dc46cSBarry Smith        MatTranspose_MPIBAIJ,
189897304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ,
18997fc3c18eSBarry Smith        MatEqual_MPIBAIJ,
1900cc2dc46cSBarry Smith        MatGetDiagonal_MPIBAIJ,
1901cc2dc46cSBarry Smith        MatDiagonalScale_MPIBAIJ,
1902cc2dc46cSBarry Smith        MatNorm_MPIBAIJ,
190397304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ,
1904cc2dc46cSBarry Smith        MatAssemblyEnd_MPIBAIJ,
1905cc2dc46cSBarry Smith        0,
1906cc2dc46cSBarry Smith        MatSetOption_MPIBAIJ,
1907cc2dc46cSBarry Smith        MatZeroEntries_MPIBAIJ,
190897304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ,
1909cc2dc46cSBarry Smith        0,
1910cc2dc46cSBarry Smith        0,
1911cc2dc46cSBarry Smith        0,
1912cc2dc46cSBarry Smith        0,
191397304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ,
1914273d9f13SBarry Smith        0,
1915cc2dc46cSBarry Smith        0,
1916cc2dc46cSBarry Smith        0,
1917cc2dc46cSBarry Smith        0,
191897304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ,
1919cc2dc46cSBarry Smith        0,
1920cc2dc46cSBarry Smith        0,
1921cc2dc46cSBarry Smith        0,
1922cc2dc46cSBarry Smith        0,
192397304618SKris Buschelman /*40*/ 0,
1924cc2dc46cSBarry Smith        MatGetSubMatrices_MPIBAIJ,
1925cc2dc46cSBarry Smith        MatIncreaseOverlap_MPIBAIJ,
1926cc2dc46cSBarry Smith        MatGetValues_MPIBAIJ,
1927cc2dc46cSBarry Smith        0,
192897304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ,
1929cc2dc46cSBarry Smith        MatScale_MPIBAIJ,
1930cc2dc46cSBarry Smith        0,
1931cc2dc46cSBarry Smith        0,
1932cc2dc46cSBarry Smith        0,
193397304618SKris Buschelman /*50*/ MatGetBlockSize_MPIBAIJ,
1934cc2dc46cSBarry Smith        0,
1935cc2dc46cSBarry Smith        0,
1936cc2dc46cSBarry Smith        0,
1937cc2dc46cSBarry Smith        0,
193897304618SKris Buschelman /*55*/ 0,
1939cc2dc46cSBarry Smith        0,
1940cc2dc46cSBarry Smith        MatSetUnfactored_MPIBAIJ,
1941cc2dc46cSBarry Smith        0,
1942cc2dc46cSBarry Smith        MatSetValuesBlocked_MPIBAIJ,
194397304618SKris Buschelman /*60*/ 0,
1944f14a1c24SBarry Smith        MatDestroy_MPIBAIJ,
1945f14a1c24SBarry Smith        MatView_MPIBAIJ,
19468a124369SBarry Smith        MatGetPetscMaps_Petsc,
19477843d17aSBarry Smith        0,
194897304618SKris Buschelman /*65*/ 0,
19497843d17aSBarry Smith        0,
19507843d17aSBarry Smith        0,
19517843d17aSBarry Smith        0,
19527843d17aSBarry Smith        0,
195397304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ,
19547843d17aSBarry Smith        0,
195597304618SKris Buschelman        0,
195697304618SKris Buschelman        0,
195797304618SKris Buschelman        0,
195897304618SKris Buschelman /*75*/ 0,
195997304618SKris Buschelman        0,
196097304618SKris Buschelman        0,
196197304618SKris Buschelman        0,
196297304618SKris Buschelman        0,
196397304618SKris Buschelman /*80*/ 0,
196497304618SKris Buschelman        0,
196597304618SKris Buschelman        0,
196697304618SKris Buschelman        0,
196797304618SKris Buschelman /*85*/ MatLoad_MPIBAIJ
196897304618SKris Buschelman };
196979bdfe76SSatish Balay 
19705ef9f2a5SBarry Smith 
1971e18c124aSSatish Balay EXTERN_C_BEGIN
19724a2ae208SSatish Balay #undef __FUNCT__
19734a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
19745ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
19755ef9f2a5SBarry Smith {
19765ef9f2a5SBarry Smith   PetscFunctionBegin;
19775ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
19785ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
19795ef9f2a5SBarry Smith   PetscFunctionReturn(0);
19805ef9f2a5SBarry Smith }
1981e18c124aSSatish Balay EXTERN_C_END
198279bdfe76SSatish Balay 
1983273d9f13SBarry Smith EXTERN_C_BEGIN
1984d94109b8SHong Zhang extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
1985d94109b8SHong Zhang EXTERN_C_END
1986d94109b8SHong Zhang 
1987d94109b8SHong Zhang EXTERN_C_BEGIN
19884a2ae208SSatish Balay #undef __FUNCT__
1989a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
1990a23d5eceSKris Buschelman int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1991a23d5eceSKris Buschelman {
1992a23d5eceSKris Buschelman   Mat_MPIBAIJ  *b;
1993a23d5eceSKris Buschelman   int          ierr,i;
1994a23d5eceSKris Buschelman 
1995a23d5eceSKris Buschelman   PetscFunctionBegin;
1996a23d5eceSKris Buschelman   B->preallocated = PETSC_TRUE;
1997a23d5eceSKris Buschelman   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1998a23d5eceSKris Buschelman 
1999a23d5eceSKris Buschelman   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2000a23d5eceSKris Buschelman   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2001a23d5eceSKris Buschelman   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2002a23d5eceSKris Buschelman   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2003a23d5eceSKris Buschelman   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2004a23d5eceSKris Buschelman   if (d_nnz) {
2005a23d5eceSKris Buschelman   for (i=0; i<B->m/bs; i++) {
2006a23d5eceSKris Buschelman       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2007a23d5eceSKris Buschelman     }
2008a23d5eceSKris Buschelman   }
2009a23d5eceSKris Buschelman   if (o_nnz) {
2010a23d5eceSKris Buschelman     for (i=0; i<B->m/bs; i++) {
2011a23d5eceSKris Buschelman       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2012a23d5eceSKris Buschelman     }
2013a23d5eceSKris Buschelman   }
2014a23d5eceSKris Buschelman 
2015a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2016a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2017a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2018a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2019a23d5eceSKris Buschelman 
2020a23d5eceSKris Buschelman   b = (Mat_MPIBAIJ*)B->data;
2021a23d5eceSKris Buschelman   b->bs  = bs;
2022a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2023a23d5eceSKris Buschelman   b->mbs = B->m/bs;
2024a23d5eceSKris Buschelman   b->nbs = B->n/bs;
2025a23d5eceSKris Buschelman   b->Mbs = B->M/bs;
2026a23d5eceSKris Buschelman   b->Nbs = B->N/bs;
2027a23d5eceSKris Buschelman 
2028a23d5eceSKris Buschelman   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2029a23d5eceSKris Buschelman   b->rowners[0]    = 0;
2030a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2031a23d5eceSKris Buschelman     b->rowners[i] += b->rowners[i-1];
2032a23d5eceSKris Buschelman   }
2033a23d5eceSKris Buschelman   b->rstart    = b->rowners[b->rank];
2034a23d5eceSKris Buschelman   b->rend      = b->rowners[b->rank+1];
2035a23d5eceSKris Buschelman 
2036a23d5eceSKris Buschelman   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2037a23d5eceSKris Buschelman   b->cowners[0] = 0;
2038a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2039a23d5eceSKris Buschelman     b->cowners[i] += b->cowners[i-1];
2040a23d5eceSKris Buschelman   }
2041a23d5eceSKris Buschelman   b->cstart    = b->cowners[b->rank];
2042a23d5eceSKris Buschelman   b->cend      = b->cowners[b->rank+1];
2043a23d5eceSKris Buschelman 
2044a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2045a23d5eceSKris Buschelman     b->rowners_bs[i] = b->rowners[i]*bs;
2046a23d5eceSKris Buschelman   }
2047a23d5eceSKris Buschelman   b->rstart_bs = b->rstart*bs;
2048a23d5eceSKris Buschelman   b->rend_bs   = b->rend*bs;
2049a23d5eceSKris Buschelman   b->cstart_bs = b->cstart*bs;
2050a23d5eceSKris Buschelman   b->cend_bs   = b->cend*bs;
2051a23d5eceSKris Buschelman 
20529c097c71SKris Buschelman   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
20539c097c71SKris Buschelman   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2054c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
20559c097c71SKris Buschelman   PetscLogObjectParent(B,b->A);
20569c097c71SKris Buschelman   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
20579c097c71SKris Buschelman   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2058c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
20599c097c71SKris Buschelman   PetscLogObjectParent(B,b->B);
2060c60e587dSKris Buschelman 
2061a23d5eceSKris Buschelman   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2062a23d5eceSKris Buschelman 
2063a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2064a23d5eceSKris Buschelman }
2065a23d5eceSKris Buschelman EXTERN_C_END
2066a23d5eceSKris Buschelman 
2067a23d5eceSKris Buschelman EXTERN_C_BEGIN
206892b32695SKris Buschelman EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
20695bf65638SKris Buschelman EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
207092b32695SKris Buschelman EXTERN_C_END
20715bf65638SKris Buschelman 
20720bad9183SKris Buschelman /*MC
2073fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
20740bad9183SKris Buschelman 
20750bad9183SKris Buschelman    Options Database Keys:
20760bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
20770bad9183SKris Buschelman 
20780bad9183SKris Buschelman   Level: beginner
20790bad9183SKris Buschelman 
20800bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ
20810bad9183SKris Buschelman M*/
20820bad9183SKris Buschelman 
208392b32695SKris Buschelman EXTERN_C_BEGIN
2084a23d5eceSKris Buschelman #undef __FUNCT__
20854a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ"
2086273d9f13SBarry Smith int MatCreate_MPIBAIJ(Mat B)
2087273d9f13SBarry Smith {
2088273d9f13SBarry Smith   Mat_MPIBAIJ  *b;
2089cfce73b9SSatish Balay   int          ierr;
2090273d9f13SBarry Smith   PetscTruth   flg;
2091273d9f13SBarry Smith 
2092273d9f13SBarry Smith   PetscFunctionBegin;
209382502324SSatish Balay   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
209482502324SSatish Balay   B->data = (void*)b;
209582502324SSatish Balay 
2096273d9f13SBarry Smith   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2097273d9f13SBarry Smith   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2098273d9f13SBarry Smith   B->mapping    = 0;
2099273d9f13SBarry Smith   B->factor     = 0;
2100273d9f13SBarry Smith   B->assembled  = PETSC_FALSE;
2101273d9f13SBarry Smith 
2102273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
2103273d9f13SBarry Smith   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2104273d9f13SBarry Smith   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2105273d9f13SBarry Smith 
2106273d9f13SBarry Smith   /* build local table of row and column ownerships */
210782502324SSatish Balay   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2108b0a32e0cSBarry Smith   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2109273d9f13SBarry Smith   b->cowners    = b->rowners + b->size + 2;
2110273d9f13SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2111273d9f13SBarry Smith 
2112273d9f13SBarry Smith   /* build cache for off array entries formed */
2113273d9f13SBarry Smith   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2114273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
2115273d9f13SBarry Smith   b->colmap      = PETSC_NULL;
2116273d9f13SBarry Smith   b->garray      = PETSC_NULL;
2117273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2118273d9f13SBarry Smith 
2119cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
2120273d9f13SBarry Smith   /* stuff for MatSetValues_XXX in single precision */
212164a35ccbSBarry Smith   b->setvalueslen     = 0;
2122273d9f13SBarry Smith   b->setvaluescopy    = PETSC_NULL;
2123273d9f13SBarry Smith #endif
2124273d9f13SBarry Smith 
2125273d9f13SBarry Smith   /* stuff used in block assembly */
2126273d9f13SBarry Smith   b->barray       = 0;
2127273d9f13SBarry Smith 
2128273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2129273d9f13SBarry Smith   b->lvec         = 0;
2130273d9f13SBarry Smith   b->Mvctx        = 0;
2131273d9f13SBarry Smith 
2132273d9f13SBarry Smith   /* stuff for MatGetRow() */
2133273d9f13SBarry Smith   b->rowindices   = 0;
2134273d9f13SBarry Smith   b->rowvalues    = 0;
2135273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2136273d9f13SBarry Smith 
2137273d9f13SBarry Smith   /* hash table stuff */
2138273d9f13SBarry Smith   b->ht           = 0;
2139273d9f13SBarry Smith   b->hd           = 0;
2140273d9f13SBarry Smith   b->ht_size      = 0;
2141273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2142273d9f13SBarry Smith   b->ht_fact      = 0;
2143273d9f13SBarry Smith   b->ht_total_ct  = 0;
2144273d9f13SBarry Smith   b->ht_insert_ct = 0;
2145273d9f13SBarry Smith 
2146b0a32e0cSBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2147273d9f13SBarry Smith   if (flg) {
2148f6275e2eSBarry Smith     PetscReal fact = 1.39;
2149273d9f13SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
215087828ca2SBarry Smith     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2151273d9f13SBarry Smith     if (fact <= 1.0) fact = 1.39;
2152273d9f13SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2153b0a32e0cSBarry Smith     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2154273d9f13SBarry Smith   }
2155273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2156273d9f13SBarry Smith                                      "MatStoreValues_MPIBAIJ",
2157273d9f13SBarry Smith                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2158273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2159273d9f13SBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
2160273d9f13SBarry Smith                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2161273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2162273d9f13SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
2163273d9f13SBarry Smith                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2164a23d5eceSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2165a23d5eceSKris Buschelman                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2166a23d5eceSKris Buschelman                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
216792b32695SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
216892b32695SKris Buschelman                                      "MatDiagonalScaleLocal_MPIBAIJ",
216992b32695SKris Buschelman                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
21705bf65638SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
21715bf65638SKris Buschelman                                      "MatSetHashTableFactor_MPIBAIJ",
21725bf65638SKris Buschelman                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2173273d9f13SBarry Smith   PetscFunctionReturn(0);
2174273d9f13SBarry Smith }
2175273d9f13SBarry Smith EXTERN_C_END
2176273d9f13SBarry Smith 
2177209238afSKris Buschelman /*MC
2178002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2179209238afSKris Buschelman 
2180209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2181209238afSKris Buschelman    and MATMPIBAIJ otherwise.
2182209238afSKris Buschelman 
2183209238afSKris Buschelman    Options Database Keys:
2184209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2185209238afSKris Buschelman 
2186209238afSKris Buschelman   Level: beginner
2187209238afSKris Buschelman 
2188209238afSKris Buschelman .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2189209238afSKris Buschelman M*/
2190209238afSKris Buschelman 
2191209238afSKris Buschelman EXTERN_C_BEGIN
2192209238afSKris Buschelman #undef __FUNCT__
2193209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ"
2194209238afSKris Buschelman int MatCreate_BAIJ(Mat A) {
2195209238afSKris Buschelman   int ierr,size;
2196209238afSKris Buschelman 
2197209238afSKris Buschelman   PetscFunctionBegin;
2198209238afSKris Buschelman   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2199209238afSKris Buschelman   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2200209238afSKris Buschelman   if (size == 1) {
2201209238afSKris Buschelman     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2202209238afSKris Buschelman   } else {
2203209238afSKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2204209238afSKris Buschelman   }
2205209238afSKris Buschelman   PetscFunctionReturn(0);
2206209238afSKris Buschelman }
2207209238afSKris Buschelman EXTERN_C_END
2208209238afSKris Buschelman 
22094a2ae208SSatish Balay #undef __FUNCT__
22104a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2211273d9f13SBarry Smith /*@C
2212273d9f13SBarry Smith    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2213273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2214273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2215273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2216273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2217273d9f13SBarry Smith 
2218273d9f13SBarry Smith    Collective on Mat
2219273d9f13SBarry Smith 
2220273d9f13SBarry Smith    Input Parameters:
2221273d9f13SBarry Smith +  A - the matrix
2222273d9f13SBarry Smith .  bs   - size of blockk
2223273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2224273d9f13SBarry Smith            submatrix  (same for all local rows)
2225273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
2226273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
2227273d9f13SBarry Smith            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2228273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2229273d9f13SBarry Smith            submatrix (same for all local rows).
2230273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
2231273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
2232273d9f13SBarry Smith            each block row) or PETSC_NULL.
2233273d9f13SBarry Smith 
2234273d9f13SBarry Smith    Output Parameter:
2235273d9f13SBarry Smith 
2236273d9f13SBarry Smith 
2237273d9f13SBarry Smith    Options Database Keys:
2238273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2239273d9f13SBarry Smith                      block calculations (much slower)
2240273d9f13SBarry Smith .   -mat_block_size - size of the blocks to use
2241273d9f13SBarry Smith 
2242273d9f13SBarry Smith    Notes:
2243273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2244273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
2245273d9f13SBarry Smith 
2246273d9f13SBarry Smith    Storage Information:
2247273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
2248273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
2249273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
2250273d9f13SBarry Smith    local matrix (a rectangular submatrix).
2251273d9f13SBarry Smith 
2252273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
2253273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
2254273d9f13SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2255273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
2256273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2257273d9f13SBarry Smith 
2258273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2259273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
2260273d9f13SBarry Smith 
2261273d9f13SBarry Smith .vb
2262273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
2263273d9f13SBarry Smith           -------------------
2264273d9f13SBarry Smith    row 3  |  o o o d d d o o o o o o
2265273d9f13SBarry Smith    row 4  |  o o o d d d o o o o o o
2266273d9f13SBarry Smith    row 5  |  o o o d d d o o o o o o
2267273d9f13SBarry Smith           -------------------
2268273d9f13SBarry Smith .ve
2269273d9f13SBarry Smith 
2270273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
2271273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
2272273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2273273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
2274273d9f13SBarry Smith 
2275273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2276273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2277273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
2278273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2279273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
2280273d9f13SBarry Smith    matrices.
2281273d9f13SBarry Smith 
2282273d9f13SBarry Smith    Level: intermediate
2283273d9f13SBarry Smith 
2284273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel
2285273d9f13SBarry Smith 
2286273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2287273d9f13SBarry Smith @*/
2288ca01db9bSBarry Smith int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2289273d9f13SBarry Smith {
2290ca01db9bSBarry Smith   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2291273d9f13SBarry Smith 
2292273d9f13SBarry Smith   PetscFunctionBegin;
2293a23d5eceSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2294a23d5eceSKris Buschelman   if (f) {
2295a23d5eceSKris Buschelman     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2296273d9f13SBarry Smith   }
2297273d9f13SBarry Smith   PetscFunctionReturn(0);
2298273d9f13SBarry Smith }
2299273d9f13SBarry Smith 
23004a2ae208SSatish Balay #undef __FUNCT__
23014a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ"
230279bdfe76SSatish Balay /*@C
230379bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
230479bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
230579bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
230679bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
230779bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
230879bdfe76SSatish Balay 
2309db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
2310db81eaa0SLois Curfman McInnes 
231179bdfe76SSatish Balay    Input Parameters:
2312db81eaa0SLois Curfman McInnes +  comm - MPI communicator
231379bdfe76SSatish Balay .  bs   - size of blockk
231479bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
231592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
231692e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
231792e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
231892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
231992e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
2320be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2321be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
232247a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
232379bdfe76SSatish Balay            submatrix  (same for all local rows)
232447a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
232592e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2326db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
232747a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
232879bdfe76SSatish Balay            submatrix (same for all local rows).
232947a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
233092e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
233192e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
233279bdfe76SSatish Balay 
233379bdfe76SSatish Balay    Output Parameter:
233479bdfe76SSatish Balay .  A - the matrix
233579bdfe76SSatish Balay 
2336db81eaa0SLois Curfman McInnes    Options Database Keys:
2337db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
2338db81eaa0SLois Curfman McInnes                      block calculations (much slower)
2339db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
23403ffaccefSLois Curfman McInnes 
2341b259b22eSLois Curfman McInnes    Notes:
234247a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
234347a75d0bSBarry Smith 
234479bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
234579bdfe76SSatish Balay    (possibly both).
234679bdfe76SSatish Balay 
2347be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2348be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2349be79a94dSBarry Smith 
235079bdfe76SSatish Balay    Storage Information:
235179bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
235279bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
235379bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
235479bdfe76SSatish Balay    local matrix (a rectangular submatrix).
235579bdfe76SSatish Balay 
235679bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
235779bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
235879bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
235979bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
236079bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
236179bdfe76SSatish Balay 
236279bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
236379bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
236479bdfe76SSatish Balay 
2365db81eaa0SLois Curfman McInnes .vb
2366db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2367db81eaa0SLois Curfman McInnes           -------------------
2368db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2369db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2370db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2371db81eaa0SLois Curfman McInnes           -------------------
2372db81eaa0SLois Curfman McInnes .ve
237379bdfe76SSatish Balay 
237479bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
237579bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
237679bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
237757b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
237879bdfe76SSatish Balay 
2379d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2380d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
238179bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
238292e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
238392e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
23846da5968aSLois Curfman McInnes    matrices.
238579bdfe76SSatish Balay 
2386027ccd11SLois Curfman McInnes    Level: intermediate
2387027ccd11SLois Curfman McInnes 
238892e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
238979bdfe76SSatish Balay 
23903eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
239179bdfe76SSatish Balay @*/
2392ca01db9bSBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
239379bdfe76SSatish Balay {
2394273d9f13SBarry Smith   int ierr,size;
239579bdfe76SSatish Balay 
2396d64ed03dSBarry Smith   PetscFunctionBegin;
2397273d9f13SBarry Smith   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2398d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2399273d9f13SBarry Smith   if (size > 1) {
2400273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2401273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2402273d9f13SBarry Smith   } else {
2403273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2404273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
24053914022bSBarry Smith   }
24063a40ed3dSBarry Smith   PetscFunctionReturn(0);
240779bdfe76SSatish Balay }
2408026e39d0SSatish Balay 
24094a2ae208SSatish Balay #undef __FUNCT__
24104a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ"
24112e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
24120ac07820SSatish Balay {
24130ac07820SSatish Balay   Mat         mat;
24140ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2415f1af5d2fSBarry Smith   int         ierr,len=0;
24160ac07820SSatish Balay 
2417d64ed03dSBarry Smith   PetscFunctionBegin;
24180ac07820SSatish Balay   *newmat       = 0;
2419273d9f13SBarry Smith   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2420be5d1d56SKris Buschelman   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2421*1d5dac46SHong Zhang   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
24227fff6886SHong Zhang 
24234beb1cfeSHong Zhang   mat->factor       = matin->factor;
2424273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
24250ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
24267fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
24277fff6886SHong Zhang 
2428273d9f13SBarry Smith   a      = (Mat_MPIBAIJ*)mat->data;
24290ac07820SSatish Balay   a->bs  = oldmat->bs;
24300ac07820SSatish Balay   a->bs2 = oldmat->bs2;
24310ac07820SSatish Balay   a->mbs = oldmat->mbs;
24320ac07820SSatish Balay   a->nbs = oldmat->nbs;
24330ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
24340ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
24350ac07820SSatish Balay 
24360ac07820SSatish Balay   a->rstart       = oldmat->rstart;
24370ac07820SSatish Balay   a->rend         = oldmat->rend;
24380ac07820SSatish Balay   a->cstart       = oldmat->cstart;
24390ac07820SSatish Balay   a->cend         = oldmat->cend;
24400ac07820SSatish Balay   a->size         = oldmat->size;
24410ac07820SSatish Balay   a->rank         = oldmat->rank;
2442aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2443aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2444aef5e8e0SSatish Balay   a->rowindices   = 0;
24450ac07820SSatish Balay   a->rowvalues    = 0;
24460ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
244730793edcSSatish Balay   a->barray       = 0;
24483123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
24493123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
24503123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
24513123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
24520ac07820SSatish Balay 
2453133cdb44SSatish Balay   /* hash table stuff */
2454133cdb44SSatish Balay   a->ht           = 0;
2455133cdb44SSatish Balay   a->hd           = 0;
2456133cdb44SSatish Balay   a->ht_size      = 0;
2457133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
245825fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2459133cdb44SSatish Balay   a->ht_total_ct  = 0;
2460133cdb44SSatish Balay   a->ht_insert_ct = 0;
2461133cdb44SSatish Balay 
2462549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
24638798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
24648798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
24650ac07820SSatish Balay   if (oldmat->colmap) {
2466aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
24670f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
246848e59246SSatish Balay #else
246982502324SSatish Balay   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2470b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2471549d3d68SSatish Balay   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
247248e59246SSatish Balay #endif
24730ac07820SSatish Balay   } else a->colmap = 0;
24744beb1cfeSHong Zhang 
24750ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
247682502324SSatish Balay     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2477b0a32e0cSBarry Smith     PetscLogObjectMemory(mat,len*sizeof(int));
2478549d3d68SSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
24790ac07820SSatish Balay   } else a->garray = 0;
24800ac07820SSatish Balay 
24810ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2482b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->lvec);
24830ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2484b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->Mvctx);
24857fff6886SHong Zhang 
24862e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2487b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->A);
24882e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2489b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->B);
2490b0a32e0cSBarry Smith   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
24910ac07820SSatish Balay   *newmat = mat;
24924beb1cfeSHong Zhang 
24933a40ed3dSBarry Smith   PetscFunctionReturn(0);
24940ac07820SSatish Balay }
249557b952d6SSatish Balay 
2496e090d566SSatish Balay #include "petscsys.h"
249757b952d6SSatish Balay 
24984a2ae208SSatish Balay #undef __FUNCT__
24994a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ"
25008e9aea5cSBarry Smith int MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
250157b952d6SSatish Balay {
250257b952d6SSatish Balay   Mat          A;
250357b952d6SSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
250487828ca2SBarry Smith   PetscScalar  *vals,*buf;
250557b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
250657b952d6SSatish Balay   MPI_Status   status;
2507cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
250857b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2509f1af5d2fSBarry Smith   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
251057b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
251157b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
251257b952d6SSatish Balay 
2513d64ed03dSBarry Smith   PetscFunctionBegin;
2514b0a32e0cSBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
251557b952d6SSatish Balay 
2516d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2517d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
251857b952d6SSatish Balay   if (!rank) {
2519b0a32e0cSBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2520e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2521552e946dSBarry Smith     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2522d64ed03dSBarry Smith     if (header[3] < 0) {
252329bbc08cSBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2524d64ed03dSBarry Smith     }
25256c5fab8fSBarry Smith   }
2526d64ed03dSBarry Smith 
2527ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
252857b952d6SSatish Balay   M = header[1]; N = header[2];
252957b952d6SSatish Balay 
253029bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
253157b952d6SSatish Balay 
253257b952d6SSatish Balay   /*
253357b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
253457b952d6SSatish Balay      divisible by the blocksize
253557b952d6SSatish Balay   */
253657b952d6SSatish Balay   Mbs        = M/bs;
253757b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
253857b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
253957b952d6SSatish Balay   else                  Mbs++;
254057b952d6SSatish Balay   if (extra_rows &&!rank) {
2541b0a32e0cSBarry Smith     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
254257b952d6SSatish Balay   }
2543537820f0SBarry Smith 
254457b952d6SSatish Balay   /* determine ownership of all rows */
254557b952d6SSatish Balay   mbs        = Mbs/size + ((Mbs % size) > rank);
254657b952d6SSatish Balay   m          = mbs*bs;
2547b0a32e0cSBarry Smith   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2548cee3aa6bSSatish Balay   browners   = rowners + size + 1;
2549ca161407SBarry Smith   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
255057b952d6SSatish Balay   rowners[0] = 0;
2551cee3aa6bSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2552cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
255357b952d6SSatish Balay   rstart = rowners[rank];
255457b952d6SSatish Balay   rend   = rowners[rank+1];
255557b952d6SSatish Balay 
255657b952d6SSatish Balay   /* distribute row lengths to all processors */
255782502324SSatish Balay   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
255857b952d6SSatish Balay   if (!rank) {
2559b0a32e0cSBarry Smith     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2560e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
256157b952d6SSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
256282502324SSatish Balay     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2563cee3aa6bSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2564ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2565606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2566d64ed03dSBarry Smith   } else {
2567ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
256857b952d6SSatish Balay   }
256957b952d6SSatish Balay 
257057b952d6SSatish Balay   if (!rank) {
257157b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
257282502324SSatish Balay     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2573549d3d68SSatish Balay     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
257457b952d6SSatish Balay     for (i=0; i<size; i++) {
257557b952d6SSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
257657b952d6SSatish Balay         procsnz[i] += rowlengths[j];
257757b952d6SSatish Balay       }
257857b952d6SSatish Balay     }
2579606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
258057b952d6SSatish Balay 
258157b952d6SSatish Balay     /* determine max buffer needed and allocate it */
258257b952d6SSatish Balay     maxnz = 0;
258357b952d6SSatish Balay     for (i=0; i<size; i++) {
258457b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
258557b952d6SSatish Balay     }
258682502324SSatish Balay     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
258757b952d6SSatish Balay 
258857b952d6SSatish Balay     /* read in my part of the matrix column indices  */
258957b952d6SSatish Balay     nz     = procsnz[0];
259082502324SSatish Balay     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
259157b952d6SSatish Balay     mycols = ibuf;
2592cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2593e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2594cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2595cee3aa6bSSatish Balay 
259657b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
259757b952d6SSatish Balay     for (i=1; i<size-1; i++) {
259857b952d6SSatish Balay       nz   = procsnz[i];
2599e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2600ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
260157b952d6SSatish Balay     }
260257b952d6SSatish Balay     /* read in the stuff for the last proc */
260357b952d6SSatish Balay     if (size != 1) {
260457b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2605e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
260657b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2607ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
260857b952d6SSatish Balay     }
2609606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2610d64ed03dSBarry Smith   } else {
261157b952d6SSatish Balay     /* determine buffer space needed for message */
261257b952d6SSatish Balay     nz = 0;
261357b952d6SSatish Balay     for (i=0; i<m; i++) {
261457b952d6SSatish Balay       nz += locrowlens[i];
261557b952d6SSatish Balay     }
261682502324SSatish Balay     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
261757b952d6SSatish Balay     mycols = ibuf;
261857b952d6SSatish Balay     /* receive message of column indices*/
2619ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2620ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
262129bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
262257b952d6SSatish Balay   }
262357b952d6SSatish Balay 
262457b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
262582502324SSatish Balay   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2626cee3aa6bSSatish Balay   odlens   = dlens + (rend-rstart);
262782502324SSatish Balay   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2628549d3d68SSatish Balay   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
262957b952d6SSatish Balay   masked1  = mask    + Mbs;
263057b952d6SSatish Balay   masked2  = masked1 + Mbs;
263157b952d6SSatish Balay   rowcount = 0; nzcount = 0;
263257b952d6SSatish Balay   for (i=0; i<mbs; i++) {
263357b952d6SSatish Balay     dcount  = 0;
263457b952d6SSatish Balay     odcount = 0;
263557b952d6SSatish Balay     for (j=0; j<bs; j++) {
263657b952d6SSatish Balay       kmax = locrowlens[rowcount];
263757b952d6SSatish Balay       for (k=0; k<kmax; k++) {
263857b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
263957b952d6SSatish Balay         if (!mask[tmp]) {
264057b952d6SSatish Balay           mask[tmp] = 1;
264157b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
264257b952d6SSatish Balay           else masked1[dcount++] = tmp;
264357b952d6SSatish Balay         }
264457b952d6SSatish Balay       }
264557b952d6SSatish Balay       rowcount++;
264657b952d6SSatish Balay     }
2647cee3aa6bSSatish Balay 
264857b952d6SSatish Balay     dlens[i]  = dcount;
264957b952d6SSatish Balay     odlens[i] = odcount;
2650cee3aa6bSSatish Balay 
265157b952d6SSatish Balay     /* zero out the mask elements we set */
265257b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
265357b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
265457b952d6SSatish Balay   }
2655cee3aa6bSSatish Balay 
265657b952d6SSatish Balay   /* create our matrix */
265778ae41b4SKris Buschelman   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
265878ae41b4SKris Buschelman   ierr = MatSetType(A,type);CHKERRQ(ierr)
265978ae41b4SKris Buschelman   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
266078ae41b4SKris Buschelman 
266178ae41b4SKris Buschelman   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
26626d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
266357b952d6SSatish Balay 
266457b952d6SSatish Balay   if (!rank) {
266587828ca2SBarry Smith     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
266657b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
266757b952d6SSatish Balay     nz = procsnz[0];
266857b952d6SSatish Balay     vals = buf;
2669cee3aa6bSSatish Balay     mycols = ibuf;
2670cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2671e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2672cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2673537820f0SBarry Smith 
267457b952d6SSatish Balay     /* insert into matrix */
267557b952d6SSatish Balay     jj      = rstart*bs;
267657b952d6SSatish Balay     for (i=0; i<m; i++) {
2677b48991e4SBarry Smith       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
267857b952d6SSatish Balay       mycols += locrowlens[i];
267957b952d6SSatish Balay       vals   += locrowlens[i];
268057b952d6SSatish Balay       jj++;
268157b952d6SSatish Balay     }
268257b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
268357b952d6SSatish Balay     for (i=1; i<size-1; i++) {
268457b952d6SSatish Balay       nz   = procsnz[i];
268557b952d6SSatish Balay       vals = buf;
2686e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2687ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
268857b952d6SSatish Balay     }
268957b952d6SSatish Balay     /* the last proc */
269057b952d6SSatish Balay     if (size != 1){
269157b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2692cee3aa6bSSatish Balay       vals = buf;
2693e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
269457b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2695ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
269657b952d6SSatish Balay     }
2697606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2698d64ed03dSBarry Smith   } else {
269957b952d6SSatish Balay     /* receive numeric values */
270087828ca2SBarry Smith     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
270157b952d6SSatish Balay 
270257b952d6SSatish Balay     /* receive message of values*/
270357b952d6SSatish Balay     vals   = buf;
2704cee3aa6bSSatish Balay     mycols = ibuf;
2705ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2706ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
270729bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
270857b952d6SSatish Balay 
270957b952d6SSatish Balay     /* insert into matrix */
271057b952d6SSatish Balay     jj      = rstart*bs;
2711cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
2712b48991e4SBarry Smith       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
271357b952d6SSatish Balay       mycols += locrowlens[i];
271457b952d6SSatish Balay       vals   += locrowlens[i];
271557b952d6SSatish Balay       jj++;
271657b952d6SSatish Balay     }
271757b952d6SSatish Balay   }
2718606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2719606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2720606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2721606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2722606d414cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2723606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
27246d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
27256d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
272678ae41b4SKris Buschelman 
272778ae41b4SKris Buschelman   *newmat = A;
27283a40ed3dSBarry Smith   PetscFunctionReturn(0);
272957b952d6SSatish Balay }
273057b952d6SSatish Balay 
27314a2ae208SSatish Balay #undef __FUNCT__
27324a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2733133cdb44SSatish Balay /*@
2734133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2735133cdb44SSatish Balay 
2736133cdb44SSatish Balay    Input Parameters:
2737133cdb44SSatish Balay .  mat  - the matrix
2738133cdb44SSatish Balay .  fact - factor
2739133cdb44SSatish Balay 
2740fee21e36SBarry Smith    Collective on Mat
2741fee21e36SBarry Smith 
27428c890885SBarry Smith    Level: advanced
27438c890885SBarry Smith 
2744133cdb44SSatish Balay   Notes:
2745133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2746133cdb44SSatish Balay 
2747133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2748133cdb44SSatish Balay 
2749133cdb44SSatish Balay .seealso: MatSetOption()
2750133cdb44SSatish Balay @*/
2751329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2752133cdb44SSatish Balay {
27535bf65638SKris Buschelman   int ierr,(*f)(Mat,PetscReal);
27545bf65638SKris Buschelman 
27555bf65638SKris Buschelman   PetscFunctionBegin;
27565bf65638SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
27575bf65638SKris Buschelman   if (f) {
27585bf65638SKris Buschelman     ierr = (*f)(mat,fact);CHKERRQ(ierr);
27595bf65638SKris Buschelman   }
27605bf65638SKris Buschelman   PetscFunctionReturn(0);
27615bf65638SKris Buschelman }
27625bf65638SKris Buschelman 
27635bf65638SKris Buschelman #undef __FUNCT__
27645bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
27655bf65638SKris Buschelman int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
27665bf65638SKris Buschelman {
276725fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2768133cdb44SSatish Balay 
2769133cdb44SSatish Balay   PetscFunctionBegin;
2770133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2771133cdb44SSatish Balay   baij->ht_fact = fact;
2772133cdb44SSatish Balay   PetscFunctionReturn(0);
2773133cdb44SSatish Balay }
2774f2a5309cSSatish Balay 
27754a2ae208SSatish Balay #undef __FUNCT__
27764a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2777268466fbSBarry Smith int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2778f2a5309cSSatish Balay {
2779f2a5309cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2780f2a5309cSSatish Balay   PetscFunctionBegin;
2781f2a5309cSSatish Balay   *Ad     = a->A;
2782f2a5309cSSatish Balay   *Ao     = a->B;
2783195d93cdSBarry Smith   *colmap = a->garray;
2784f2a5309cSSatish Balay   PetscFunctionReturn(0);
2785f2a5309cSSatish Balay }
2786