xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision b0a32e0c6855ee6a6cd3495fa7da12ea9885bc5d)
1*b0a32e0cSBarry Smith /*$Id: mpibaij.c,v 1.207 2000/11/28 17:29:23 bsmith Exp bsmith $*/
279bdfe76SSatish Balay 
3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
579bdfe76SSatish Balay 
6ca44d042SBarry Smith EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7ca44d042SBarry Smith EXTERN int DisAssemble_MPIBAIJ(Mat);
8ca44d042SBarry Smith EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9ca44d042SBarry Smith EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10ca44d042SBarry Smith EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11ca44d042SBarry Smith EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13ca44d042SBarry Smith EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14ca44d042SBarry Smith EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15ca44d042SBarry Smith EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16ca44d042SBarry Smith EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*);
1793fea6afSBarry Smith 
1893fea6afSBarry Smith /*  UGLY, ugly, ugly
1993fea6afSBarry Smith    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
2093fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
2193fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2293fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2393fea6afSBarry Smith    into the single precision data structures.
2493fea6afSBarry Smith */
2593fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
26ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27ca44d042SBarry Smith EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29ca44d042SBarry Smith EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
3193fea6afSBarry Smith #else
326fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
3393fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
3493fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
356fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
366fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
3793fea6afSBarry Smith #endif
383b2fbd54SBarry Smith 
397843d17aSBarry Smith #undef __FUNC__
40*b0a32e0cSBarry Smith #define __FUNC__ "MatGetRowMax_MPIBAIJ"
417843d17aSBarry Smith int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
427843d17aSBarry Smith {
437843d17aSBarry Smith   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
447843d17aSBarry Smith   int          ierr,i;
45273d9f13SBarry Smith   Scalar       *va,*vb;
467843d17aSBarry Smith   Vec          vtmp;
477843d17aSBarry Smith 
487843d17aSBarry Smith   PetscFunctionBegin;
497843d17aSBarry Smith 
507843d17aSBarry Smith   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51273d9f13SBarry Smith   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
527843d17aSBarry Smith 
53273d9f13SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRA(ierr);
547843d17aSBarry Smith   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55273d9f13SBarry Smith   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
567843d17aSBarry Smith 
57273d9f13SBarry Smith   for (i=0; i<A->m; i++){
58273d9f13SBarry Smith     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
597843d17aSBarry Smith   }
607843d17aSBarry Smith 
617843d17aSBarry Smith   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
627843d17aSBarry Smith   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
637843d17aSBarry Smith   ierr = VecDestroy(vtmp);CHKERRA(ierr);
647843d17aSBarry Smith 
657843d17aSBarry Smith   PetscFunctionReturn(0);
667843d17aSBarry Smith }
677843d17aSBarry Smith 
687fc3c18eSBarry Smith EXTERN_C_BEGIN
697fc3c18eSBarry Smith #undef __FUNC__
70*b0a32e0cSBarry Smith #define __FUNC__ "MatStoreValues_MPIBAIJ"
717fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat)
727fc3c18eSBarry Smith {
737fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
747fc3c18eSBarry Smith   int         ierr;
757fc3c18eSBarry Smith 
767fc3c18eSBarry Smith   PetscFunctionBegin;
777fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
787fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
797fc3c18eSBarry Smith   PetscFunctionReturn(0);
807fc3c18eSBarry Smith }
817fc3c18eSBarry Smith EXTERN_C_END
827fc3c18eSBarry Smith 
837fc3c18eSBarry Smith EXTERN_C_BEGIN
847fc3c18eSBarry Smith #undef __FUNC__
85*b0a32e0cSBarry Smith #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
867fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat)
877fc3c18eSBarry Smith {
887fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
897fc3c18eSBarry Smith   int         ierr;
907fc3c18eSBarry Smith 
917fc3c18eSBarry Smith   PetscFunctionBegin;
927fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
937fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
947fc3c18eSBarry Smith   PetscFunctionReturn(0);
957fc3c18eSBarry Smith }
967fc3c18eSBarry Smith EXTERN_C_END
977fc3c18eSBarry Smith 
98537820f0SBarry Smith /*
99537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
10057b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
10157b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
10257b952d6SSatish Balay    length of colmap equals the global matrix length.
10357b952d6SSatish Balay */
1045615d1e5SSatish Balay #undef __FUNC__
105*b0a32e0cSBarry Smith #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
10657b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
10757b952d6SSatish Balay {
10857b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
10957b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110dc2900e9SSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
11157b952d6SSatish Balay 
112d64ed03dSBarry Smith   PetscFunctionBegin;
113aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
114f14a1c24SBarry Smith   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
11548e59246SSatish Balay   for (i=0; i<nbs; i++){
1160f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
11748e59246SSatish Balay   }
11848e59246SSatish Balay #else
119*b0a32e0cSBarry Smith   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKERRQ(ierr);
120*b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121549d3d68SSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
12348e59246SSatish Balay #endif
1243a40ed3dSBarry Smith   PetscFunctionReturn(0);
12557b952d6SSatish Balay }
12657b952d6SSatish Balay 
12780c1aa95SSatish Balay #define CHUNKSIZE  10
12880c1aa95SSatish Balay 
129f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
13080c1aa95SSatish Balay { \
13180c1aa95SSatish Balay  \
13280c1aa95SSatish Balay     brow = row/bs;  \
13380c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
13580c1aa95SSatish Balay       bcol = col/bs; \
13680c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
137ab26458aSBarry Smith       low = 0; high = nrow; \
138ab26458aSBarry Smith       while (high-low > 3) { \
139ab26458aSBarry Smith         t = (low+high)/2; \
140ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
141ab26458aSBarry Smith         else              low  = t; \
142ab26458aSBarry Smith       } \
143ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
14480c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
14580c1aa95SSatish Balay         if (rp[_i] == bcol) { \
14680c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
148eada6651SSatish Balay           else                    *bap  = value;  \
149ac7a638eSSatish Balay           goto a_noinsert; \
15080c1aa95SSatish Balay         } \
15180c1aa95SSatish Balay       } \
15289280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
15329bbc08cSBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
15480c1aa95SSatish Balay       if (nrow >= rmax) { \
15580c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
15680c1aa95SSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
1573eda8832SBarry Smith         MatScalar *new_a; \
15880c1aa95SSatish Balay  \
15929bbc08cSBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
16089280ab3SLois Curfman McInnes  \
16180c1aa95SSatish Balay         /* malloc new storage space */ \
1623eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163*b0a32e0cSBarry Smith ierr = PetscMalloc(len,&(        new_a   ));CHKERRQ(ierr); \
16480c1aa95SSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
16580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
16680c1aa95SSatish Balay  \
16780c1aa95SSatish Balay         /* copy over old data into new slots */ \
16880c1aa95SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
16980c1aa95SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
17180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
1723eda8832SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
1733eda8832SBarry Smith         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
175549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
1763eda8832SBarry Smith                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
17780c1aa95SSatish Balay         /* free up old matrix storage */ \
178606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179606d414cSSatish Balay         if (!a->singlemalloc) { \
180606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182606d414cSSatish Balay         } \
18380c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1847c922b88SBarry Smith         a->singlemalloc = PETSC_TRUE; \
18580c1aa95SSatish Balay  \
18680c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188*b0a32e0cSBarry Smith         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
18980c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
19080c1aa95SSatish Balay         a->reallocs++; \
19180c1aa95SSatish Balay         a->nz++; \
19280c1aa95SSatish Balay       } \
19380c1aa95SSatish Balay       N = nrow++ - 1;  \
19480c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
19580c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
19680c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1973eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
19880c1aa95SSatish Balay       } \
1993eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
20080c1aa95SSatish Balay       rp[_i]                      = bcol;  \
20180c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
202ac7a638eSSatish Balay       a_noinsert:; \
20380c1aa95SSatish Balay     ailen[brow] = nrow; \
20480c1aa95SSatish Balay }
20557b952d6SSatish Balay 
206ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207ac7a638eSSatish Balay { \
208ac7a638eSSatish Balay     brow = row/bs;  \
209ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
211ac7a638eSSatish Balay       bcol = col/bs; \
212ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
213ac7a638eSSatish Balay       low = 0; high = nrow; \
214ac7a638eSSatish Balay       while (high-low > 3) { \
215ac7a638eSSatish Balay         t = (low+high)/2; \
216ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
217ac7a638eSSatish Balay         else              low  = t; \
218ac7a638eSSatish Balay       } \
219ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
220ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
221ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
222ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
224ac7a638eSSatish Balay           else                    *bap  = value;  \
225ac7a638eSSatish Balay           goto b_noinsert; \
226ac7a638eSSatish Balay         } \
227ac7a638eSSatish Balay       } \
22889280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
22929bbc08cSBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
230ac7a638eSSatish Balay       if (nrow >= rmax) { \
231ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
23274c639caSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
2333eda8832SBarry Smith         MatScalar *new_a; \
234ac7a638eSSatish Balay  \
23529bbc08cSBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
23689280ab3SLois Curfman McInnes  \
237ac7a638eSSatish Balay         /* malloc new storage space */ \
2383eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239*b0a32e0cSBarry Smith ierr = PetscMalloc(len,&(        new_a   ));CHKERRQ(ierr); \
240ac7a638eSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
241ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
242ac7a638eSSatish Balay  \
243ac7a638eSSatish Balay         /* copy over old data into new slots */ \
244ac7a638eSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
24574c639caSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247ac7a638eSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
2483eda8832SBarry Smith         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
2493eda8832SBarry Smith         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
2503eda8832SBarry Smith         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
2523eda8832SBarry Smith                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253ac7a638eSSatish Balay         /* free up old matrix storage */ \
254606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255606d414cSSatish Balay         if (!b->singlemalloc) { \
256606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258606d414cSSatish Balay         } \
25974c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
2607c922b88SBarry Smith         b->singlemalloc = PETSC_TRUE; \
261ac7a638eSSatish Balay  \
262ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264*b0a32e0cSBarry Smith         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
26574c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
26674c639caSSatish Balay         b->reallocs++; \
26774c639caSSatish Balay         b->nz++; \
268ac7a638eSSatish Balay       } \
269ac7a638eSSatish Balay       N = nrow++ - 1;  \
270ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
271ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
272ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2733eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274ac7a638eSSatish Balay       } \
2753eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
277ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
278ac7a638eSSatish Balay       b_noinsert:; \
279ac7a638eSSatish Balay     bilen[brow] = nrow; \
280ac7a638eSSatish Balay }
281ac7a638eSSatish Balay 
28293fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2835615d1e5SSatish Balay #undef __FUNC__
284*b0a32e0cSBarry Smith #define __FUNC__ "MatSetValues_MPIBAIJ"
285ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
28657b952d6SSatish Balay {
2876fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
28893fea6afSBarry Smith   int         ierr,i,N = m*n;
2896fa18ffdSBarry Smith   MatScalar   *vsingle;
29093fea6afSBarry Smith 
29193fea6afSBarry Smith   PetscFunctionBegin;
2926fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2936fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
294*b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&(    b->setvaluescopy ));CHKERRQ(ierr);
2956fa18ffdSBarry Smith     b->setvalueslen  = N;
2966fa18ffdSBarry Smith   }
2976fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2986fa18ffdSBarry Smith 
29993fea6afSBarry Smith   for (i=0; i<N; i++) {
30093fea6afSBarry Smith     vsingle[i] = v[i];
30193fea6afSBarry Smith   }
30293fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
30393fea6afSBarry Smith   PetscFunctionReturn(0);
30493fea6afSBarry Smith }
30593fea6afSBarry Smith 
30693fea6afSBarry Smith #undef __FUNC__
307*b0a32e0cSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
30893fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
30993fea6afSBarry Smith {
3106fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3116fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
3126fa18ffdSBarry Smith   MatScalar   *vsingle;
31393fea6afSBarry Smith 
31493fea6afSBarry Smith   PetscFunctionBegin;
3156fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3166fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
317*b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&(    b->setvaluescopy ));CHKERRQ(ierr);
3186fa18ffdSBarry Smith     b->setvalueslen  = N;
3196fa18ffdSBarry Smith   }
3206fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
32193fea6afSBarry Smith   for (i=0; i<N; i++) {
32293fea6afSBarry Smith     vsingle[i] = v[i];
32393fea6afSBarry Smith   }
32493fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
32593fea6afSBarry Smith   PetscFunctionReturn(0);
32693fea6afSBarry Smith }
3276fa18ffdSBarry Smith 
3286fa18ffdSBarry Smith #undef __FUNC__
329*b0a32e0cSBarry Smith #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
3306fa18ffdSBarry Smith int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
3316fa18ffdSBarry Smith {
3326fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3336fa18ffdSBarry Smith   int         ierr,i,N = m*n;
3346fa18ffdSBarry Smith   MatScalar   *vsingle;
3356fa18ffdSBarry Smith 
3366fa18ffdSBarry Smith   PetscFunctionBegin;
3376fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3386fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
339*b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&(    b->setvaluescopy ));CHKERRQ(ierr);
3406fa18ffdSBarry Smith     b->setvalueslen  = N;
3416fa18ffdSBarry Smith   }
3426fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3436fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3446fa18ffdSBarry Smith     vsingle[i] = v[i];
3456fa18ffdSBarry Smith   }
3466fa18ffdSBarry Smith   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3476fa18ffdSBarry Smith   PetscFunctionReturn(0);
3486fa18ffdSBarry Smith }
3496fa18ffdSBarry Smith 
3506fa18ffdSBarry Smith #undef __FUNC__
351*b0a32e0cSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
3526fa18ffdSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
3536fa18ffdSBarry Smith {
3546fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
3556fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
3566fa18ffdSBarry Smith   MatScalar   *vsingle;
3576fa18ffdSBarry Smith 
3586fa18ffdSBarry Smith   PetscFunctionBegin;
3596fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3606fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
361*b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&(    b->setvaluescopy ));CHKERRQ(ierr);
3626fa18ffdSBarry Smith     b->setvalueslen  = N;
3636fa18ffdSBarry Smith   }
3646fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3656fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3666fa18ffdSBarry Smith     vsingle[i] = v[i];
3676fa18ffdSBarry Smith   }
3686fa18ffdSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3696fa18ffdSBarry Smith   PetscFunctionReturn(0);
3706fa18ffdSBarry Smith }
37193fea6afSBarry Smith #endif
37293fea6afSBarry Smith 
37393fea6afSBarry Smith #undef __FUNC__
374*b0a32e0cSBarry Smith #define __FUNC__ "MatSetValues_MPIBAIJ"
37593fea6afSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
37693fea6afSBarry Smith {
37757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
37893fea6afSBarry Smith   MatScalar   value;
379273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
3804fa0d573SSatish Balay   int         ierr,i,j,row,col;
381273d9f13SBarry Smith   int         rstart_orig=baij->rstart_bs;
3824fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
3834fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
38457b952d6SSatish Balay 
385eada6651SSatish Balay   /* Some Variables required in the macro */
38680c1aa95SSatish Balay   Mat         A = baij->A;
38780c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
388ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
3893eda8832SBarry Smith   MatScalar   *aa=a->a;
390ac7a638eSSatish Balay 
391ac7a638eSSatish Balay   Mat         B = baij->B;
392ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
393ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3943eda8832SBarry Smith   MatScalar   *ba=b->a;
395ac7a638eSSatish Balay 
396ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
397ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
3983eda8832SBarry Smith   MatScalar   *ap,*bap;
39980c1aa95SSatish Balay 
400d64ed03dSBarry Smith   PetscFunctionBegin;
40157b952d6SSatish Balay   for (i=0; i<m; i++) {
4025ef9f2a5SBarry Smith     if (im[i] < 0) continue;
403aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
404273d9f13SBarry Smith     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
405639f9d9dSBarry Smith #endif
40657b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
40757b952d6SSatish Balay       row = im[i] - rstart_orig;
40857b952d6SSatish Balay       for (j=0; j<n; j++) {
40957b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
41057b952d6SSatish Balay           col = in[j] - cstart_orig;
41157b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
41380c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
41473959e64SBarry Smith         } else if (in[j] < 0) continue;
415aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
416273d9f13SBarry Smith         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
417639f9d9dSBarry Smith #endif
41857b952d6SSatish Balay         else {
41957b952d6SSatish Balay           if (mat->was_assembled) {
420905e6a2fSBarry Smith             if (!baij->colmap) {
421905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
422905e6a2fSBarry Smith             }
423aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4240f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
425fa46199cSSatish Balay             col  = col - 1 + in[j]%bs;
42648e59246SSatish Balay #else
427905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
42848e59246SSatish Balay #endif
42957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
43057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
43157b952d6SSatish Balay               col =  in[j];
4329bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
4339bf004c3SSatish Balay               B = baij->B;
4349bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
4359bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
4369bf004c3SSatish Balay               ba=b->a;
43757b952d6SSatish Balay             }
438d64ed03dSBarry Smith           } else col = in[j];
43957b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
440ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
441ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
44257b952d6SSatish Balay         }
44357b952d6SSatish Balay       }
444d64ed03dSBarry Smith     } else {
44590f02eecSBarry Smith       if (!baij->donotstash) {
446ff2fd236SBarry Smith         if (roworiented) {
4476fa18ffdSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448ff2fd236SBarry Smith         } else {
4496fa18ffdSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
45057b952d6SSatish Balay         }
45157b952d6SSatish Balay       }
45257b952d6SSatish Balay     }
45390f02eecSBarry Smith   }
4543a40ed3dSBarry Smith   PetscFunctionReturn(0);
45557b952d6SSatish Balay }
45657b952d6SSatish Balay 
457ab26458aSBarry Smith #undef __FUNC__
458*b0a32e0cSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
45993fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
460ab26458aSBarry Smith {
461ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
46293fea6afSBarry Smith   MatScalar   *value,*barray=baij->barray;
463273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
464273d9f13SBarry Smith   int         ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
465ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
466ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
467ab26458aSBarry Smith 
468b16ae2b1SBarry Smith   PetscFunctionBegin;
46930793edcSSatish Balay   if(!barray) {
470*b0a32e0cSBarry Smith     baij->barray =ierr = PetscMalloc(bs2*sizeof(MatScalar),&( barray ));CHKERRQ(ierr);
47130793edcSSatish Balay   }
47230793edcSSatish Balay 
473ab26458aSBarry Smith   if (roworiented) {
474ab26458aSBarry Smith     stepval = (n-1)*bs;
475ab26458aSBarry Smith   } else {
476ab26458aSBarry Smith     stepval = (m-1)*bs;
477ab26458aSBarry Smith   }
478ab26458aSBarry Smith   for (i=0; i<m; i++) {
4795ef9f2a5SBarry Smith     if (im[i] < 0) continue;
480aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
48129bbc08cSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
482ab26458aSBarry Smith #endif
483ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
484ab26458aSBarry Smith       row = im[i] - rstart;
485ab26458aSBarry Smith       for (j=0; j<n; j++) {
48615b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
48715b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
48815b57d14SSatish Balay           barray = v + i*bs2;
48915b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
49015b57d14SSatish Balay           barray = v + j*bs2;
49115b57d14SSatish Balay         } else { /* Here a copy is required */
492ab26458aSBarry Smith           if (roworiented) {
493ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
494ab26458aSBarry Smith           } else {
495ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
496abef11f7SSatish Balay           }
49747513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
49847513183SBarry Smith             for (jj=0; jj<bs; jj++) {
49930793edcSSatish Balay               *barray++  = *value++;
50047513183SBarry Smith             }
50147513183SBarry Smith           }
50230793edcSSatish Balay           barray -=bs2;
50315b57d14SSatish Balay         }
504abef11f7SSatish Balay 
505abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
506abef11f7SSatish Balay           col  = in[j] - cstart;
50793fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
508ab26458aSBarry Smith         }
5095ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
510aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
51129bbc08cSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
512ab26458aSBarry Smith #endif
513ab26458aSBarry Smith         else {
514ab26458aSBarry Smith           if (mat->was_assembled) {
515ab26458aSBarry Smith             if (!baij->colmap) {
516ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
517ab26458aSBarry Smith             }
518a5eb4965SSatish Balay 
519aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
520aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
521fa46199cSSatish Balay             { int data;
5220f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
52329bbc08cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
524fa46199cSSatish Balay             }
52548e59246SSatish Balay #else
52629bbc08cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
527a5eb4965SSatish Balay #endif
52848e59246SSatish Balay #endif
529aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
5300f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
531fa46199cSSatish Balay             col  = (col - 1)/bs;
53248e59246SSatish Balay #else
533a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
53448e59246SSatish Balay #endif
535ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
536ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
537ab26458aSBarry Smith               col =  in[j];
538ab26458aSBarry Smith             }
539ab26458aSBarry Smith           }
540ab26458aSBarry Smith           else col = in[j];
54193fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
542ab26458aSBarry Smith         }
543ab26458aSBarry Smith       }
544d64ed03dSBarry Smith     } else {
545ab26458aSBarry Smith       if (!baij->donotstash) {
546ff2fd236SBarry Smith         if (roworiented) {
5476fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
548ff2fd236SBarry Smith         } else {
5496fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
550ff2fd236SBarry Smith         }
551abef11f7SSatish Balay       }
552ab26458aSBarry Smith     }
553ab26458aSBarry Smith   }
5543a40ed3dSBarry Smith   PetscFunctionReturn(0);
555ab26458aSBarry Smith }
5566fa18ffdSBarry Smith 
5570bdbc534SSatish Balay #define HASH_KEY 0.6180339887
558c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
5596fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
560c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
5615615d1e5SSatish Balay #undef __FUNC__
562*b0a32e0cSBarry Smith #define __FUNC__ "MatSetValues_MPIBAIJ_HT_MatScalar"
5636fa18ffdSBarry Smith int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
5640bdbc534SSatish Balay {
5650bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
566273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
5670bdbc534SSatish Balay   int         ierr,i,j,row,col;
568273d9f13SBarry Smith   int         rstart_orig=baij->rstart_bs;
569c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
570c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
571329f5518SBarry Smith   PetscReal   tmp;
5723eda8832SBarry Smith   MatScalar   **HD = baij->hd,value;
573aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5744a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5754a15367fSSatish Balay #endif
5760bdbc534SSatish Balay 
5770bdbc534SSatish Balay   PetscFunctionBegin;
5780bdbc534SSatish Balay 
5790bdbc534SSatish Balay   for (i=0; i<m; i++) {
580aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
58129bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
582273d9f13SBarry Smith     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
5830bdbc534SSatish Balay #endif
5840bdbc534SSatish Balay       row = im[i];
585c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
5860bdbc534SSatish Balay       for (j=0; j<n; j++) {
5870bdbc534SSatish Balay         col = in[j];
5886fa18ffdSBarry Smith         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5890bdbc534SSatish Balay         /* Look up into the Hash Table */
590c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
591c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5920bdbc534SSatish Balay 
593c2760754SSatish Balay 
594c2760754SSatish Balay         idx = h1;
595aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
596187ce0cbSSatish Balay         insert_ct++;
597187ce0cbSSatish Balay         total_ct++;
598187ce0cbSSatish Balay         if (HT[idx] != key) {
599187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
600187ce0cbSSatish Balay           if (idx == size) {
601187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
602187ce0cbSSatish Balay             if (idx == h1) {
60329bbc08cSBarry Smith               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
604187ce0cbSSatish Balay             }
605187ce0cbSSatish Balay           }
606187ce0cbSSatish Balay         }
607187ce0cbSSatish Balay #else
608c2760754SSatish Balay         if (HT[idx] != key) {
609c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
610c2760754SSatish Balay           if (idx == size) {
611c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
612c2760754SSatish Balay             if (idx == h1) {
61329bbc08cSBarry Smith               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
614c2760754SSatish Balay             }
615c2760754SSatish Balay           }
616c2760754SSatish Balay         }
617187ce0cbSSatish Balay #endif
618c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
619c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
620c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
6210bdbc534SSatish Balay       }
6220bdbc534SSatish Balay     } else {
6230bdbc534SSatish Balay       if (!baij->donotstash) {
624ff2fd236SBarry Smith         if (roworiented) {
6258798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
626ff2fd236SBarry Smith         } else {
6278798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
6280bdbc534SSatish Balay         }
6290bdbc534SSatish Balay       }
6300bdbc534SSatish Balay     }
6310bdbc534SSatish Balay   }
632aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
633187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
634187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
635187ce0cbSSatish Balay #endif
6360bdbc534SSatish Balay   PetscFunctionReturn(0);
6370bdbc534SSatish Balay }
6380bdbc534SSatish Balay 
6390bdbc534SSatish Balay #undef __FUNC__
640*b0a32e0cSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
6416fa18ffdSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
6420bdbc534SSatish Balay {
6430bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
644273d9f13SBarry Smith   PetscTruth  roworiented = baij->roworiented;
6458798bf22SSatish Balay   int         ierr,i,j,ii,jj,row,col;
646273d9f13SBarry Smith   int         rstart=baij->rstart ;
647b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
648c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
649329f5518SBarry Smith   PetscReal   tmp;
6503eda8832SBarry Smith   MatScalar   **HD = baij->hd,*baij_a;
6516fa18ffdSBarry Smith   MatScalar   *v_t,*value;
652aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
6534a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
6544a15367fSSatish Balay #endif
6550bdbc534SSatish Balay 
656d0a41580SSatish Balay   PetscFunctionBegin;
657d0a41580SSatish Balay 
6580bdbc534SSatish Balay   if (roworiented) {
6590bdbc534SSatish Balay     stepval = (n-1)*bs;
6600bdbc534SSatish Balay   } else {
6610bdbc534SSatish Balay     stepval = (m-1)*bs;
6620bdbc534SSatish Balay   }
6630bdbc534SSatish Balay   for (i=0; i<m; i++) {
664aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
66529bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
66629bbc08cSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
6670bdbc534SSatish Balay #endif
6680bdbc534SSatish Balay     row   = im[i];
669187ce0cbSSatish Balay     v_t   = v + i*bs2;
670c2760754SSatish Balay     if (row >= rstart && row < rend) {
6710bdbc534SSatish Balay       for (j=0; j<n; j++) {
6720bdbc534SSatish Balay         col = in[j];
6730bdbc534SSatish Balay 
6740bdbc534SSatish Balay         /* Look up into the Hash Table */
675c2760754SSatish Balay         key = row*Nbs+col+1;
676c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6770bdbc534SSatish Balay 
678c2760754SSatish Balay         idx = h1;
679aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
680187ce0cbSSatish Balay         total_ct++;
681187ce0cbSSatish Balay         insert_ct++;
682187ce0cbSSatish Balay        if (HT[idx] != key) {
683187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
684187ce0cbSSatish Balay           if (idx == size) {
685187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
686187ce0cbSSatish Balay             if (idx == h1) {
68729bbc08cSBarry Smith               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
688187ce0cbSSatish Balay             }
689187ce0cbSSatish Balay           }
690187ce0cbSSatish Balay         }
691187ce0cbSSatish Balay #else
692c2760754SSatish Balay         if (HT[idx] != key) {
693c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
694c2760754SSatish Balay           if (idx == size) {
695c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
696c2760754SSatish Balay             if (idx == h1) {
69729bbc08cSBarry Smith               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
698c2760754SSatish Balay             }
699c2760754SSatish Balay           }
700c2760754SSatish Balay         }
701187ce0cbSSatish Balay #endif
702c2760754SSatish Balay         baij_a = HD[idx];
7030bdbc534SSatish Balay         if (roworiented) {
704c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
705187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
706187ce0cbSSatish Balay           value = v_t;
707187ce0cbSSatish Balay           v_t  += bs;
708fef45726SSatish Balay           if (addv == ADD_VALUES) {
709c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
710c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
711fef45726SSatish Balay                 baij_a[jj]  += *value++;
712b4cc0f5aSSatish Balay               }
713b4cc0f5aSSatish Balay             }
714fef45726SSatish Balay           } else {
715c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
716c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
717fef45726SSatish Balay                 baij_a[jj]  = *value++;
718fef45726SSatish Balay               }
719fef45726SSatish Balay             }
720fef45726SSatish Balay           }
7210bdbc534SSatish Balay         } else {
7220bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
723fef45726SSatish Balay           if (addv == ADD_VALUES) {
724b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
7250bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
726fef45726SSatish Balay                 baij_a[jj]  += *value++;
727fef45726SSatish Balay               }
728fef45726SSatish Balay             }
729fef45726SSatish Balay           } else {
730fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
731fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
732fef45726SSatish Balay                 baij_a[jj]  = *value++;
733fef45726SSatish Balay               }
734b4cc0f5aSSatish Balay             }
7350bdbc534SSatish Balay           }
7360bdbc534SSatish Balay         }
7370bdbc534SSatish Balay       }
7380bdbc534SSatish Balay     } else {
7390bdbc534SSatish Balay       if (!baij->donotstash) {
7400bdbc534SSatish Balay         if (roworiented) {
7418798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7420bdbc534SSatish Balay         } else {
7438798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7440bdbc534SSatish Balay         }
7450bdbc534SSatish Balay       }
7460bdbc534SSatish Balay     }
7470bdbc534SSatish Balay   }
748aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
749187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
750187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
751187ce0cbSSatish Balay #endif
7520bdbc534SSatish Balay   PetscFunctionReturn(0);
7530bdbc534SSatish Balay }
754133cdb44SSatish Balay 
7550bdbc534SSatish Balay #undef __FUNC__
756*b0a32e0cSBarry Smith #define __FUNC__ "MatGetValues_MPIBAIJ"
757ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
758d6de1c52SSatish Balay {
759d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
760d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
76148e59246SSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
762d6de1c52SSatish Balay 
763133cdb44SSatish Balay   PetscFunctionBegin;
764d6de1c52SSatish Balay   for (i=0; i<m; i++) {
76529bbc08cSBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
766273d9f13SBarry Smith     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
767d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
768d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
769d6de1c52SSatish Balay       for (j=0; j<n; j++) {
77029bbc08cSBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
771273d9f13SBarry Smith         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
772d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
773d6de1c52SSatish Balay           col = idxn[j] - bscstart;
77498dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
775d64ed03dSBarry Smith         } else {
776905e6a2fSBarry Smith           if (!baij->colmap) {
777905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
778905e6a2fSBarry Smith           }
779aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7800f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
781fa46199cSSatish Balay           data --;
78248e59246SSatish Balay #else
78348e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
78448e59246SSatish Balay #endif
78548e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
786d9d09a02SSatish Balay           else {
78748e59246SSatish Balay             col  = data + idxn[j]%bs;
78898dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
789d6de1c52SSatish Balay           }
790d6de1c52SSatish Balay         }
791d6de1c52SSatish Balay       }
792d64ed03dSBarry Smith     } else {
79329bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
794d6de1c52SSatish Balay     }
795d6de1c52SSatish Balay   }
7963a40ed3dSBarry Smith  PetscFunctionReturn(0);
797d6de1c52SSatish Balay }
798d6de1c52SSatish Balay 
7995615d1e5SSatish Balay #undef __FUNC__
800*b0a32e0cSBarry Smith #define __FUNC__ "MatNorm_MPIBAIJ"
801329f5518SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
802d6de1c52SSatish Balay {
803d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
804d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
805acdf5bf4SSatish Balay   int        ierr,i,bs2=baij->bs2;
806329f5518SBarry Smith   PetscReal  sum = 0.0;
8073eda8832SBarry Smith   MatScalar  *v;
808d6de1c52SSatish Balay 
809d64ed03dSBarry Smith   PetscFunctionBegin;
810d6de1c52SSatish Balay   if (baij->size == 1) {
811d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
812d6de1c52SSatish Balay   } else {
813d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
814d6de1c52SSatish Balay       v = amat->a;
815d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++) {
816aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
817329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
818d6de1c52SSatish Balay #else
819d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
820d6de1c52SSatish Balay #endif
821d6de1c52SSatish Balay       }
822d6de1c52SSatish Balay       v = bmat->a;
823d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++) {
824aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
825329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
826d6de1c52SSatish Balay #else
827d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
828d6de1c52SSatish Balay #endif
829d6de1c52SSatish Balay       }
830ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
831d6de1c52SSatish Balay       *norm = sqrt(*norm);
832d64ed03dSBarry Smith     } else {
83329bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
834d6de1c52SSatish Balay     }
835d64ed03dSBarry Smith   }
8363a40ed3dSBarry Smith   PetscFunctionReturn(0);
837d6de1c52SSatish Balay }
83857b952d6SSatish Balay 
839bd7f49f5SSatish Balay 
840fef45726SSatish Balay /*
841fef45726SSatish Balay   Creates the hash table, and sets the table
842fef45726SSatish Balay   This table is created only once.
843fef45726SSatish Balay   If new entried need to be added to the matrix
844fef45726SSatish Balay   then the hash table has to be destroyed and
845fef45726SSatish Balay   recreated.
846fef45726SSatish Balay */
847fef45726SSatish Balay #undef __FUNC__
848*b0a32e0cSBarry Smith #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
849329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
850596b8d2eSBarry Smith {
851596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
852596b8d2eSBarry Smith   Mat         A = baij->A,B=baij->B;
853596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
8540bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
855549d3d68SSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
856187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
857fef45726SSatish Balay   int         *HT,key;
8583eda8832SBarry Smith   MatScalar   **HD;
859329f5518SBarry Smith   PetscReal   tmp;
860aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
8614a15367fSSatish Balay   int         ct=0,max=0;
8624a15367fSSatish Balay #endif
863fef45726SSatish Balay 
864d64ed03dSBarry Smith   PetscFunctionBegin;
8650bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
8660bdbc534SSatish Balay   size = baij->ht_size;
867fef45726SSatish Balay 
8680bdbc534SSatish Balay   if (baij->ht) {
8690bdbc534SSatish Balay     PetscFunctionReturn(0);
870596b8d2eSBarry Smith   }
8710bdbc534SSatish Balay 
872fef45726SSatish Balay   /* Allocate Memory for Hash Table */
873*b0a32e0cSBarry Smith ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&  baij->hd );CHKERRQ(ierr);
874b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
875b9e4cc15SSatish Balay   HD = baij->hd;
876a07cd24cSSatish Balay   HT = baij->ht;
877b9e4cc15SSatish Balay 
878b9e4cc15SSatish Balay 
879549d3d68SSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
8800bdbc534SSatish Balay 
881596b8d2eSBarry Smith 
882596b8d2eSBarry Smith   /* Loop Over A */
8830bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
884596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
8850bdbc534SSatish Balay       row = i+rstart;
8860bdbc534SSatish Balay       col = aj[j]+cstart;
887596b8d2eSBarry Smith 
888187ce0cbSSatish Balay       key = row*Nbs + col + 1;
889c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8900bdbc534SSatish Balay       for (k=0; k<size; k++){
8910bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8920bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8930bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
894596b8d2eSBarry Smith           break;
895aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
896187ce0cbSSatish Balay         } else {
897187ce0cbSSatish Balay           ct++;
898187ce0cbSSatish Balay #endif
899596b8d2eSBarry Smith         }
900187ce0cbSSatish Balay       }
901aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
902187ce0cbSSatish Balay       if (k> max) max = k;
903187ce0cbSSatish Balay #endif
904596b8d2eSBarry Smith     }
905596b8d2eSBarry Smith   }
906596b8d2eSBarry Smith   /* Loop Over B */
9070bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
908596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
9090bdbc534SSatish Balay       row = i+rstart;
9100bdbc534SSatish Balay       col = garray[bj[j]];
911187ce0cbSSatish Balay       key = row*Nbs + col + 1;
912c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9130bdbc534SSatish Balay       for (k=0; k<size; k++){
9140bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
9150bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9160bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
917596b8d2eSBarry Smith           break;
918aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
919187ce0cbSSatish Balay         } else {
920187ce0cbSSatish Balay           ct++;
921187ce0cbSSatish Balay #endif
922596b8d2eSBarry Smith         }
923187ce0cbSSatish Balay       }
924aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
925187ce0cbSSatish Balay       if (k> max) max = k;
926187ce0cbSSatish Balay #endif
927596b8d2eSBarry Smith     }
928596b8d2eSBarry Smith   }
929596b8d2eSBarry Smith 
930596b8d2eSBarry Smith   /* Print Summary */
931aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
932c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
933596b8d2eSBarry Smith     if (HT[i]) {j++;}
934c38d4ed2SBarry Smith   }
935*b0a32e0cSBarry Smith   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
936187ce0cbSSatish Balay #endif
9373a40ed3dSBarry Smith   PetscFunctionReturn(0);
938596b8d2eSBarry Smith }
93957b952d6SSatish Balay 
940bbb85fb3SSatish Balay #undef __FUNC__
941*b0a32e0cSBarry Smith #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
942bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
943bbb85fb3SSatish Balay {
944bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
945ff2fd236SBarry Smith   int         ierr,nstash,reallocs;
946bbb85fb3SSatish Balay   InsertMode  addv;
947bbb85fb3SSatish Balay 
948bbb85fb3SSatish Balay   PetscFunctionBegin;
949bbb85fb3SSatish Balay   if (baij->donotstash) {
950bbb85fb3SSatish Balay     PetscFunctionReturn(0);
951bbb85fb3SSatish Balay   }
952bbb85fb3SSatish Balay 
953bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
954bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
955bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
95629bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
957bbb85fb3SSatish Balay   }
958bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
959bbb85fb3SSatish Balay 
9608798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
9618798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
9628798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
963*b0a32e0cSBarry Smith   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
9648798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
965*b0a32e0cSBarry Smith   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
966bbb85fb3SSatish Balay   PetscFunctionReturn(0);
967bbb85fb3SSatish Balay }
968bbb85fb3SSatish Balay 
969bbb85fb3SSatish Balay #undef __FUNC__
970*b0a32e0cSBarry Smith #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
971bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
972bbb85fb3SSatish Balay {
973bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
974a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
975a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
9767c922b88SBarry Smith   int         *row,*col,other_disassembled;
9777c922b88SBarry Smith   PetscTruth  r1,r2,r3;
9783eda8832SBarry Smith   MatScalar   *val;
979bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
980bbb85fb3SSatish Balay 
981bbb85fb3SSatish Balay   PetscFunctionBegin;
982bbb85fb3SSatish Balay   if (!baij->donotstash) {
983a2d1c673SSatish Balay     while (1) {
9848798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
985a2d1c673SSatish Balay       if (!flg) break;
986a2d1c673SSatish Balay 
987bbb85fb3SSatish Balay       for (i=0; i<n;) {
988bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
989bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
990bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
991bbb85fb3SSatish Balay         else       ncols = n-i;
992bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
99393fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
994bbb85fb3SSatish Balay         i = j;
995bbb85fb3SSatish Balay       }
996bbb85fb3SSatish Balay     }
9978798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
998a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
999a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1000a2d1c673SSatish Balay        restore the original flags */
1001a2d1c673SSatish Balay     r1 = baij->roworiented;
1002a2d1c673SSatish Balay     r2 = a->roworiented;
1003a2d1c673SSatish Balay     r3 = b->roworiented;
10047c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
10057c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
10067c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
1007a2d1c673SSatish Balay     while (1) {
10088798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1009a2d1c673SSatish Balay       if (!flg) break;
1010a2d1c673SSatish Balay 
1011a2d1c673SSatish Balay       for (i=0; i<n;) {
1012a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1013a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1014a2d1c673SSatish Balay         if (j < n) ncols = j-i;
1015a2d1c673SSatish Balay         else       ncols = n-i;
101693fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1017a2d1c673SSatish Balay         i = j;
1018a2d1c673SSatish Balay       }
1019a2d1c673SSatish Balay     }
10208798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1021a2d1c673SSatish Balay     baij->roworiented = r1;
1022a2d1c673SSatish Balay     a->roworiented    = r2;
1023a2d1c673SSatish Balay     b->roworiented    = r3;
1024bbb85fb3SSatish Balay   }
1025bbb85fb3SSatish Balay 
1026bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1027bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1028bbb85fb3SSatish Balay 
1029bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
1030bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1031bbb85fb3SSatish Balay   /*
1032bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1033bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1034bbb85fb3SSatish Balay   */
1035bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1036bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1037bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1038bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1039bbb85fb3SSatish Balay     }
1040bbb85fb3SSatish Balay   }
1041bbb85fb3SSatish Balay 
1042bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1043bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1044bbb85fb3SSatish Balay   }
1045bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1046bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1047bbb85fb3SSatish Balay 
1048aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
1049bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1050*b0a32e0cSBarry Smith     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1051bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1052bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1053bbb85fb3SSatish Balay   }
1054bbb85fb3SSatish Balay #endif
1055bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1056bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1057bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1058bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1059bbb85fb3SSatish Balay   }
1060bbb85fb3SSatish Balay 
1061606d414cSSatish Balay   if (baij->rowvalues) {
1062606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1063606d414cSSatish Balay     baij->rowvalues = 0;
1064606d414cSSatish Balay   }
1065bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1066bbb85fb3SSatish Balay }
106757b952d6SSatish Balay 
10685615d1e5SSatish Balay #undef __FUNC__
1069*b0a32e0cSBarry Smith #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1070*b0a32e0cSBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
107157b952d6SSatish Balay {
107257b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
107377ed5343SBarry Smith   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
10746831982aSBarry Smith   PetscTruth   isascii,isdraw;
1075*b0a32e0cSBarry Smith   PetscViewer       sviewer;
107657b952d6SSatish Balay 
1077d64ed03dSBarry Smith   PetscFunctionBegin;
1078*b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1079*b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
10800f5bd95cSBarry Smith   if (isascii) {
1081*b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1082*b0a32e0cSBarry Smith     if (format == PETSC_VIEWER_FORMAT_ASCII_INFO_LONG) {
10834e220ebcSLois Curfman McInnes       MatInfo info;
1084d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1085d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1086*b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1087273d9f13SBarry Smith               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
10886831982aSBarry Smith               baij->bs,(int)info.memory);CHKERRQ(ierr);
1089d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1090*b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1091d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1092*b0a32e0cSBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1093*b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
109457b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
10953a40ed3dSBarry Smith       PetscFunctionReturn(0);
1096*b0a32e0cSBarry Smith     } else if (format == PETSC_VIEWER_FORMAT_ASCII_INFO) {
1097*b0a32e0cSBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
10983a40ed3dSBarry Smith       PetscFunctionReturn(0);
109957b952d6SSatish Balay     }
110057b952d6SSatish Balay   }
110157b952d6SSatish Balay 
11020f5bd95cSBarry Smith   if (isdraw) {
1103*b0a32e0cSBarry Smith     PetscDraw       draw;
110457b952d6SSatish Balay     PetscTruth isnull;
1105*b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1106*b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
110757b952d6SSatish Balay   }
110857b952d6SSatish Balay 
110957b952d6SSatish Balay   if (size == 1) {
111057b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1111d64ed03dSBarry Smith   } else {
111257b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
111357b952d6SSatish Balay     Mat         A;
111457b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1115273d9f13SBarry Smith     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
11163eda8832SBarry Smith     MatScalar   *a;
111757b952d6SSatish Balay 
111857b952d6SSatish Balay     if (!rank) {
111955843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1120d64ed03dSBarry Smith     } else {
112155843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
112257b952d6SSatish Balay     }
1123*b0a32e0cSBarry Smith     PetscLogObjectParent(mat,A);
112457b952d6SSatish Balay 
112557b952d6SSatish Balay     /* copy over the A part */
112657b952d6SSatish Balay     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
112757b952d6SSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1128*b0a32e0cSBarry Smith ierr = PetscMalloc(bs*sizeof(int),&(    rvals ));CHKERRQ(ierr);
112957b952d6SSatish Balay 
113057b952d6SSatish Balay     for (i=0; i<mbs; i++) {
113157b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
113257b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
113357b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
113457b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
113557b952d6SSatish Balay         for (k=0; k<bs; k++) {
113693fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1137cee3aa6bSSatish Balay           col++; a += bs;
113857b952d6SSatish Balay         }
113957b952d6SSatish Balay       }
114057b952d6SSatish Balay     }
114157b952d6SSatish Balay     /* copy over the B part */
114257b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
114357b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
114457b952d6SSatish Balay     for (i=0; i<mbs; i++) {
114557b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
114657b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
114757b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
114857b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
114957b952d6SSatish Balay         for (k=0; k<bs; k++) {
115093fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1151cee3aa6bSSatish Balay           col++; a += bs;
115257b952d6SSatish Balay         }
115357b952d6SSatish Balay       }
115457b952d6SSatish Balay     }
1155606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
11566d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
11576d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
115855843e3eSBarry Smith     /*
115955843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1160*b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
116155843e3eSBarry Smith     */
1162*b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1163f14a1c24SBarry Smith     if (!rank) {
11646831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
116557b952d6SSatish Balay     }
1166*b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
116757b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
116857b952d6SSatish Balay   }
11693a40ed3dSBarry Smith   PetscFunctionReturn(0);
117057b952d6SSatish Balay }
117157b952d6SSatish Balay 
11725615d1e5SSatish Balay #undef __FUNC__
1173*b0a32e0cSBarry Smith #define __FUNC__ "MatView_MPIBAIJ"
1174*b0a32e0cSBarry Smith int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
117557b952d6SSatish Balay {
117657b952d6SSatish Balay   int        ierr;
11776831982aSBarry Smith   PetscTruth isascii,isdraw,issocket,isbinary;
117857b952d6SSatish Balay 
1179d64ed03dSBarry Smith   PetscFunctionBegin;
1180*b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1181*b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1182*b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1183*b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
11840f5bd95cSBarry Smith   if (isascii || isdraw || issocket || isbinary) {
11857b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
11865cd90555SBarry Smith   } else {
118729bbc08cSBarry Smith     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
118857b952d6SSatish Balay   }
11893a40ed3dSBarry Smith   PetscFunctionReturn(0);
119057b952d6SSatish Balay }
119157b952d6SSatish Balay 
11925615d1e5SSatish Balay #undef __FUNC__
1193*b0a32e0cSBarry Smith #define __FUNC__ "MatDestroy_MPIBAIJ"
1194e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
119579bdfe76SSatish Balay {
119679bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
119779bdfe76SSatish Balay   int         ierr;
119879bdfe76SSatish Balay 
1199d64ed03dSBarry Smith   PetscFunctionBegin;
1200aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1201*b0a32e0cSBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
120279bdfe76SSatish Balay #endif
12038798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12048798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1205606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
120679bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
120779bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1208aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
12090f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
121048e59246SSatish Balay #else
1211606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
121248e59246SSatish Balay #endif
1213606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1214606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1215606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1216606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1217606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1218606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
12196fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
12206fa18ffdSBarry Smith   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
12216fa18ffdSBarry Smith #endif
1222606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
12233a40ed3dSBarry Smith   PetscFunctionReturn(0);
122479bdfe76SSatish Balay }
122579bdfe76SSatish Balay 
12265615d1e5SSatish Balay #undef __FUNC__
1227*b0a32e0cSBarry Smith #define __FUNC__ "MatMult_MPIBAIJ"
1228ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1229cee3aa6bSSatish Balay {
1230cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
123147b4a8eaSLois Curfman McInnes   int         ierr,nt;
1232cee3aa6bSSatish Balay 
1233d64ed03dSBarry Smith   PetscFunctionBegin;
1234e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1235273d9f13SBarry Smith   if (nt != A->n) {
123629bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
123747b4a8eaSLois Curfman McInnes   }
1238e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1239273d9f13SBarry Smith   if (nt != A->m) {
124029bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
124147b4a8eaSLois Curfman McInnes   }
124243a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1243f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
124443a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1245f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
124643a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
12473a40ed3dSBarry Smith   PetscFunctionReturn(0);
1248cee3aa6bSSatish Balay }
1249cee3aa6bSSatish Balay 
12505615d1e5SSatish Balay #undef __FUNC__
1251*b0a32e0cSBarry Smith #define __FUNC__ "MatMultAdd_MPIBAIJ"
1252ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1253cee3aa6bSSatish Balay {
1254cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1255cee3aa6bSSatish Balay   int        ierr;
1256d64ed03dSBarry Smith 
1257d64ed03dSBarry Smith   PetscFunctionBegin;
125843a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1259f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
126043a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1261f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12623a40ed3dSBarry Smith   PetscFunctionReturn(0);
1263cee3aa6bSSatish Balay }
1264cee3aa6bSSatish Balay 
12655615d1e5SSatish Balay #undef __FUNC__
1266*b0a32e0cSBarry Smith #define __FUNC__ "MatMultTranspose_MPIBAIJ"
12677c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1268cee3aa6bSSatish Balay {
1269cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1270cee3aa6bSSatish Balay   int         ierr;
1271cee3aa6bSSatish Balay 
1272d64ed03dSBarry Smith   PetscFunctionBegin;
1273cee3aa6bSSatish Balay   /* do nondiagonal part */
12747c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1275cee3aa6bSSatish Balay   /* send it on its way */
1276537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1277cee3aa6bSSatish Balay   /* do local part */
12787c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1279cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1280cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1281cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1282639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12833a40ed3dSBarry Smith   PetscFunctionReturn(0);
1284cee3aa6bSSatish Balay }
1285cee3aa6bSSatish Balay 
12865615d1e5SSatish Balay #undef __FUNC__
1287*b0a32e0cSBarry Smith #define __FUNC__ "MatMultTransposeAdd_MPIBAIJ"
12887c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1289cee3aa6bSSatish Balay {
1290cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1291cee3aa6bSSatish Balay   int         ierr;
1292cee3aa6bSSatish Balay 
1293d64ed03dSBarry Smith   PetscFunctionBegin;
1294cee3aa6bSSatish Balay   /* do nondiagonal part */
12957c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1296cee3aa6bSSatish Balay   /* send it on its way */
1297537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1298cee3aa6bSSatish Balay   /* do local part */
12997c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1300cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1301cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1302cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1303537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
13043a40ed3dSBarry Smith   PetscFunctionReturn(0);
1305cee3aa6bSSatish Balay }
1306cee3aa6bSSatish Balay 
1307cee3aa6bSSatish Balay /*
1308cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1309cee3aa6bSSatish Balay    diagonal block
1310cee3aa6bSSatish Balay */
13115615d1e5SSatish Balay #undef __FUNC__
1312*b0a32e0cSBarry Smith #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1313ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1314cee3aa6bSSatish Balay {
1315cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
13163a40ed3dSBarry Smith   int         ierr;
1317d64ed03dSBarry Smith 
1318d64ed03dSBarry Smith   PetscFunctionBegin;
1319273d9f13SBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13203a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13213a40ed3dSBarry Smith   PetscFunctionReturn(0);
1322cee3aa6bSSatish Balay }
1323cee3aa6bSSatish Balay 
13245615d1e5SSatish Balay #undef __FUNC__
1325*b0a32e0cSBarry Smith #define __FUNC__ "MatScale_MPIBAIJ"
1326ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1327cee3aa6bSSatish Balay {
1328cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1329cee3aa6bSSatish Balay   int         ierr;
1330d64ed03dSBarry Smith 
1331d64ed03dSBarry Smith   PetscFunctionBegin;
1332cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1333cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
13343a40ed3dSBarry Smith   PetscFunctionReturn(0);
1335cee3aa6bSSatish Balay }
1336026e39d0SSatish Balay 
13375615d1e5SSatish Balay #undef __FUNC__
1338*b0a32e0cSBarry Smith #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1339ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
134057b952d6SSatish Balay {
134157b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1342d64ed03dSBarry Smith 
1343d64ed03dSBarry Smith   PetscFunctionBegin;
1344273d9f13SBarry Smith   if (m) *m = mat->rstart_bs;
1345273d9f13SBarry Smith   if (n) *n = mat->rend_bs;
13463a40ed3dSBarry Smith   PetscFunctionReturn(0);
134757b952d6SSatish Balay }
134857b952d6SSatish Balay 
13495615d1e5SSatish Balay #undef __FUNC__
1350*b0a32e0cSBarry Smith #define __FUNC__ "MatGetRow_MPIBAIJ"
1351acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1352acdf5bf4SSatish Balay {
1353acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1354acdf5bf4SSatish Balay   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1355acdf5bf4SSatish Balay   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1356d9d09a02SSatish Balay   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1357d9d09a02SSatish Balay   int        *cmap,*idx_p,cstart = mat->cstart;
1358acdf5bf4SSatish Balay 
1359d64ed03dSBarry Smith   PetscFunctionBegin;
136029bbc08cSBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1361acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1362acdf5bf4SSatish Balay 
1363acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1364acdf5bf4SSatish Balay     /*
1365acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1366acdf5bf4SSatish Balay     */
1367acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1368bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1369bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1370acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1371acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1372acdf5bf4SSatish Balay     }
1373*b0a32e0cSBarry Smith ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)),&    mat->rowvalues );CHKERRQ(ierr);
1374acdf5bf4SSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1375acdf5bf4SSatish Balay   }
1376acdf5bf4SSatish Balay 
137729bbc08cSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1378d9d09a02SSatish Balay   lrow = row - brstart;
1379acdf5bf4SSatish Balay 
1380acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1381acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1382acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1383f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1384f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1385acdf5bf4SSatish Balay   nztot = nzA + nzB;
1386acdf5bf4SSatish Balay 
1387acdf5bf4SSatish Balay   cmap  = mat->garray;
1388acdf5bf4SSatish Balay   if (v  || idx) {
1389acdf5bf4SSatish Balay     if (nztot) {
1390acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1391acdf5bf4SSatish Balay       int imark = -1;
1392acdf5bf4SSatish Balay       if (v) {
1393acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1394acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1395d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1396acdf5bf4SSatish Balay           else break;
1397acdf5bf4SSatish Balay         }
1398acdf5bf4SSatish Balay         imark = i;
1399acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1400acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1401acdf5bf4SSatish Balay       }
1402acdf5bf4SSatish Balay       if (idx) {
1403acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1404acdf5bf4SSatish Balay         if (imark > -1) {
1405acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1406bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1407acdf5bf4SSatish Balay           }
1408acdf5bf4SSatish Balay         } else {
1409acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1410d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1411d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1412acdf5bf4SSatish Balay             else break;
1413acdf5bf4SSatish Balay           }
1414acdf5bf4SSatish Balay           imark = i;
1415acdf5bf4SSatish Balay         }
1416d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1417d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1418acdf5bf4SSatish Balay       }
1419d64ed03dSBarry Smith     } else {
1420d212a18eSSatish Balay       if (idx) *idx = 0;
1421d212a18eSSatish Balay       if (v)   *v   = 0;
1422d212a18eSSatish Balay     }
1423acdf5bf4SSatish Balay   }
1424acdf5bf4SSatish Balay   *nz = nztot;
1425f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1426f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14273a40ed3dSBarry Smith   PetscFunctionReturn(0);
1428acdf5bf4SSatish Balay }
1429acdf5bf4SSatish Balay 
14305615d1e5SSatish Balay #undef __FUNC__
1431*b0a32e0cSBarry Smith #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1432acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1433acdf5bf4SSatish Balay {
1434acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1435d64ed03dSBarry Smith 
1436d64ed03dSBarry Smith   PetscFunctionBegin;
1437acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
143829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1439acdf5bf4SSatish Balay   }
1440acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14413a40ed3dSBarry Smith   PetscFunctionReturn(0);
1442acdf5bf4SSatish Balay }
1443acdf5bf4SSatish Balay 
14445615d1e5SSatish Balay #undef __FUNC__
1445*b0a32e0cSBarry Smith #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1446ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
14475a838052SSatish Balay {
14485a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1449d64ed03dSBarry Smith 
1450d64ed03dSBarry Smith   PetscFunctionBegin;
14515a838052SSatish Balay   *bs = baij->bs;
14523a40ed3dSBarry Smith   PetscFunctionReturn(0);
14535a838052SSatish Balay }
14545a838052SSatish Balay 
14555615d1e5SSatish Balay #undef __FUNC__
1456*b0a32e0cSBarry Smith #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1457ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
145858667388SSatish Balay {
145958667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
146058667388SSatish Balay   int         ierr;
1461d64ed03dSBarry Smith 
1462d64ed03dSBarry Smith   PetscFunctionBegin;
146358667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
146458667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14653a40ed3dSBarry Smith   PetscFunctionReturn(0);
146658667388SSatish Balay }
14670ac07820SSatish Balay 
14685615d1e5SSatish Balay #undef __FUNC__
1469*b0a32e0cSBarry Smith #define __FUNC__ "MatGetInfo_MPIBAIJ"
1470ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14710ac07820SSatish Balay {
14724e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
14734e220ebcSLois Curfman McInnes   Mat         A = a->A,B = a->B;
14747d57db60SLois Curfman McInnes   int         ierr;
1475329f5518SBarry Smith   PetscReal   isend[5],irecv[5];
14760ac07820SSatish Balay 
1477d64ed03dSBarry Smith   PetscFunctionBegin;
14784e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
14794e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
14800e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1481de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14824e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
14830e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1484de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
14850ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14864e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14874e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14884e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14894e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14904e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14910ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1492f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
14934e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14944e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14954e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14964e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14974e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14980ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1499f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
15004e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15014e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15024e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15034e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15044e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1505d41123aaSBarry Smith   } else {
150629bbc08cSBarry Smith     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
15070ac07820SSatish Balay   }
1508273d9f13SBarry Smith   info->rows_global       = (double)A->M;
1509273d9f13SBarry Smith   info->columns_global    = (double)A->N;
1510273d9f13SBarry Smith   info->rows_local        = (double)A->m;
1511273d9f13SBarry Smith   info->columns_local     = (double)A->N;
15124e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
15134e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
15144e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
15153a40ed3dSBarry Smith   PetscFunctionReturn(0);
15160ac07820SSatish Balay }
15170ac07820SSatish Balay 
15185615d1e5SSatish Balay #undef __FUNC__
1519*b0a32e0cSBarry Smith #define __FUNC__ "MatSetOption_MPIBAIJ"
1520ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
152158667388SSatish Balay {
152258667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
152398305bb5SBarry Smith   int         ierr;
152458667388SSatish Balay 
1525d64ed03dSBarry Smith   PetscFunctionBegin;
152658667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
152758667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
15286da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1529c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
15304787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
15317c922b88SBarry Smith       op == MAT_KEEP_ZEROED_ROWS ||
15324787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
153398305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
153498305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1535b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
15367c922b88SBarry Smith         a->roworiented = PETSC_TRUE;
153798305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
153898305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1539b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
15406da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
154158667388SSatish Balay              op == MAT_SYMMETRIC ||
154258667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1543b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
154498305bb5SBarry Smith              op == MAT_USE_HASH_TABLE) {
1545*b0a32e0cSBarry Smith     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
154698305bb5SBarry Smith   } else if (op == MAT_COLUMN_ORIENTED) {
15477c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
154898305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
154998305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
15502b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
15517c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
1552d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
155329bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1554133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
15557c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
1556d64ed03dSBarry Smith   } else {
155729bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"unknown option");
1558d64ed03dSBarry Smith   }
15593a40ed3dSBarry Smith   PetscFunctionReturn(0);
156058667388SSatish Balay }
156158667388SSatish Balay 
15625615d1e5SSatish Balay #undef __FUNC__
1563*b0a32e0cSBarry Smith #define __FUNC__ "MatTranspose_MPIBAIJ("
1564ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15650ac07820SSatish Balay {
15660ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
15670ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15680ac07820SSatish Balay   Mat         B;
1569273d9f13SBarry Smith   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
15700ac07820SSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
15713eda8832SBarry Smith   MatScalar   *a;
15720ac07820SSatish Balay 
1573d64ed03dSBarry Smith   PetscFunctionBegin;
157429bbc08cSBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1575273d9f13SBarry Smith   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
15760ac07820SSatish Balay 
15770ac07820SSatish Balay   /* copy over the A part */
15780ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15790ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1580*b0a32e0cSBarry Smith ierr = PetscMalloc(bs*sizeof(int),&(  rvals ));CHKERRQ(ierr);
15810ac07820SSatish Balay 
15820ac07820SSatish Balay   for (i=0; i<mbs; i++) {
15830ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15840ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
15850ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
15860ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
15870ac07820SSatish Balay       for (k=0; k<bs; k++) {
158893fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15890ac07820SSatish Balay         col++; a += bs;
15900ac07820SSatish Balay       }
15910ac07820SSatish Balay     }
15920ac07820SSatish Balay   }
15930ac07820SSatish Balay   /* copy over the B part */
15940ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
15950ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15960ac07820SSatish Balay   for (i=0; i<mbs; i++) {
15970ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15980ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
15990ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16000ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16010ac07820SSatish Balay       for (k=0; k<bs; k++) {
160293fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16030ac07820SSatish Balay         col++; a += bs;
16040ac07820SSatish Balay       }
16050ac07820SSatish Balay     }
16060ac07820SSatish Balay   }
1607606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
16080ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16090ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16100ac07820SSatish Balay 
16117c922b88SBarry Smith   if (matout) {
16120ac07820SSatish Balay     *matout = B;
16130ac07820SSatish Balay   } else {
1614273d9f13SBarry Smith     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
16150ac07820SSatish Balay   }
16163a40ed3dSBarry Smith   PetscFunctionReturn(0);
16170ac07820SSatish Balay }
16180e95ebc0SSatish Balay 
16195615d1e5SSatish Balay #undef __FUNC__
1620*b0a32e0cSBarry Smith #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
162136c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16220e95ebc0SSatish Balay {
162336c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
162436c4a09eSSatish Balay   Mat         a = baij->A,b = baij->B;
16250e95ebc0SSatish Balay   int         ierr,s1,s2,s3;
16260e95ebc0SSatish Balay 
1627d64ed03dSBarry Smith   PetscFunctionBegin;
162836c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
162936c4a09eSSatish Balay   if (rr) {
163036c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
163129bbc08cSBarry Smith     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
163236c4a09eSSatish Balay     /* Overlap communication with computation. */
163336c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
163436c4a09eSSatish Balay   }
16350e95ebc0SSatish Balay   if (ll) {
16360e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
163729bbc08cSBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1638a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
16390e95ebc0SSatish Balay   }
164036c4a09eSSatish Balay   /* scale  the diagonal block */
164136c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
164236c4a09eSSatish Balay 
164336c4a09eSSatish Balay   if (rr) {
164436c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
164536c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1646a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
164736c4a09eSSatish Balay   }
164836c4a09eSSatish Balay 
16493a40ed3dSBarry Smith   PetscFunctionReturn(0);
16500e95ebc0SSatish Balay }
16510e95ebc0SSatish Balay 
16525615d1e5SSatish Balay #undef __FUNC__
1653*b0a32e0cSBarry Smith #define __FUNC__ "MatZeroRows_MPIBAIJ"
1654ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
16550ac07820SSatish Balay {
16560ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
16570ac07820SSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
165835d8aa7fSBarry Smith   int            *procs,*nprocs,j,idx,nsends,*work,row;
16590ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
16600ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1661a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16620ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16630ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16640ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16650ac07820SSatish Balay   IS             istmp;
166635d8aa7fSBarry Smith   PetscTruth     found;
16670ac07820SSatish Balay 
1668d64ed03dSBarry Smith   PetscFunctionBegin;
1669f14a1c24SBarry Smith   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
16700ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
16710ac07820SSatish Balay 
16720ac07820SSatish Balay   /*  first count number of contributors to each processor */
1673*b0a32e0cSBarry Smith ierr = PetscMalloc(2*size*sizeof(int),&(  nprocs ));CHKERRQ(ierr);
1674549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1675549d3d68SSatish Balay   procs  = nprocs + size;
1676*b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&  owner  );CHKERRQ(ierr); /* see note*/
16770ac07820SSatish Balay   for (i=0; i<N; i++) {
16780ac07820SSatish Balay     idx   = rows[i];
167935d8aa7fSBarry Smith     found = PETSC_FALSE;
16800ac07820SSatish Balay     for (j=0; j<size; j++) {
16810ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
168235d8aa7fSBarry Smith         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
16830ac07820SSatish Balay       }
16840ac07820SSatish Balay     }
168529bbc08cSBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
16860ac07820SSatish Balay   }
16870ac07820SSatish Balay   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
16880ac07820SSatish Balay 
16890ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
1690*b0a32e0cSBarry Smith ierr = PetscMalloc(2*size*sizeof(int),&(  work   ));CHKERRQ(ierr);
16916831982aSBarry Smith   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
16920ac07820SSatish Balay   nmax   = work[rank];
16936831982aSBarry Smith   nrecvs = work[size+rank];
1694606d414cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
16950ac07820SSatish Balay 
16960ac07820SSatish Balay   /* post receives:   */
1697*b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&  rvalues );CHKERRQ(ierr);
1698*b0a32e0cSBarry Smith ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&  recv_waits );CHKERRQ(ierr);
16990ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1700ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
17010ac07820SSatish Balay   }
17020ac07820SSatish Balay 
17030ac07820SSatish Balay   /* do sends:
17040ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
17050ac07820SSatish Balay      the ith processor
17060ac07820SSatish Balay   */
1707*b0a32e0cSBarry Smith ierr = PetscMalloc((N+1)*sizeof(int),&  svalues    );CHKERRQ(ierr);
1708*b0a32e0cSBarry Smith ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&  send_waits );CHKERRQ(ierr);
1709*b0a32e0cSBarry Smith ierr = PetscMalloc((size+1)*sizeof(int),&  starts     );CHKERRQ(ierr);
17100ac07820SSatish Balay   starts[0]  = 0;
17110ac07820SSatish Balay   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
17120ac07820SSatish Balay   for (i=0; i<N; i++) {
17130ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
17140ac07820SSatish Balay   }
17156831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
17160ac07820SSatish Balay 
17170ac07820SSatish Balay   starts[0] = 0;
17180ac07820SSatish Balay   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
17190ac07820SSatish Balay   count = 0;
17200ac07820SSatish Balay   for (i=0; i<size; i++) {
17210ac07820SSatish Balay     if (procs[i]) {
1722ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17230ac07820SSatish Balay     }
17240ac07820SSatish Balay   }
1725606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
17260ac07820SSatish Balay 
17270ac07820SSatish Balay   base = owners[rank]*bs;
17280ac07820SSatish Balay 
17290ac07820SSatish Balay   /*  wait on receives */
1730*b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&  lens   );CHKERRQ(ierr);
17310ac07820SSatish Balay   source = lens + nrecvs;
17320ac07820SSatish Balay   count  = nrecvs; slen = 0;
17330ac07820SSatish Balay   while (count) {
1734ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17350ac07820SSatish Balay     /* unpack receives into our local space */
1736ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
17370ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17380ac07820SSatish Balay     lens[imdex]    = n;
17390ac07820SSatish Balay     slen          += n;
17400ac07820SSatish Balay     count--;
17410ac07820SSatish Balay   }
1742606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
17430ac07820SSatish Balay 
17440ac07820SSatish Balay   /* move the data into the send scatter */
1745*b0a32e0cSBarry Smith ierr = PetscMalloc((slen+1)*sizeof(int),&  lrows );CHKERRQ(ierr);
17460ac07820SSatish Balay   count = 0;
17470ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
17480ac07820SSatish Balay     values = rvalues + i*nmax;
17490ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
17500ac07820SSatish Balay       lrows[count++] = values[j] - base;
17510ac07820SSatish Balay     }
17520ac07820SSatish Balay   }
1753606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1754606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1755606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1756606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
17570ac07820SSatish Balay 
17580ac07820SSatish Balay   /* actually zap the local rows */
1759029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1760*b0a32e0cSBarry Smith   PetscLogObjectParent(A,istmp);
1761a07cd24cSSatish Balay 
176272dacd9aSBarry Smith   /*
176372dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
176472dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
176572dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
176672dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
176772dacd9aSBarry Smith 
176872dacd9aSBarry Smith        Contributed by: Mathew Knepley
176972dacd9aSBarry Smith   */
17709c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
17716fa18ffdSBarry Smith   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
17729c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
17736fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
17749c957beeSSatish Balay   } else if (diag) {
17756fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1776fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
177729bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1778fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
17796525c446SSatish Balay     }
1780a07cd24cSSatish Balay     for (i=0; i<slen; i++) {
1781a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
17823f11ea53SBarry Smith       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1783a07cd24cSSatish Balay     }
1784a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1785a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17869c957beeSSatish Balay   } else {
17876fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1788a07cd24cSSatish Balay   }
17899c957beeSSatish Balay 
17909c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1791606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1792a07cd24cSSatish Balay 
17930ac07820SSatish Balay   /* wait on sends */
17940ac07820SSatish Balay   if (nsends) {
1795*b0a32e0cSBarry Smith ierr = PetscMalloc(nsends*sizeof(MPI_Status),&(    send_status ));CHKERRQ(ierr);
1796ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1797606d414cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
17980ac07820SSatish Balay   }
1799606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1800606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
18010ac07820SSatish Balay 
18023a40ed3dSBarry Smith   PetscFunctionReturn(0);
18030ac07820SSatish Balay }
180472dacd9aSBarry Smith 
18055615d1e5SSatish Balay #undef __FUNC__
1806*b0a32e0cSBarry Smith #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1807ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1808ba4ca20aSSatish Balay {
1809ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
181025fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1811133cdb44SSatish Balay   static int  called = 0;
18123a40ed3dSBarry Smith   int         ierr;
1813ba4ca20aSSatish Balay 
1814d64ed03dSBarry Smith   PetscFunctionBegin;
18153a40ed3dSBarry Smith   if (!a->rank) {
18163a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
181725fdafccSSatish Balay   }
181825fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1819d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1820d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
18213a40ed3dSBarry Smith   PetscFunctionReturn(0);
1822ba4ca20aSSatish Balay }
18230ac07820SSatish Balay 
18245615d1e5SSatish Balay #undef __FUNC__
1825*b0a32e0cSBarry Smith #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1826ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1827bb5a7306SBarry Smith {
1828bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1829bb5a7306SBarry Smith   int         ierr;
1830d64ed03dSBarry Smith 
1831d64ed03dSBarry Smith   PetscFunctionBegin;
1832bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
18333a40ed3dSBarry Smith   PetscFunctionReturn(0);
1834bb5a7306SBarry Smith }
1835bb5a7306SBarry Smith 
18362e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18370ac07820SSatish Balay 
18387fc3c18eSBarry Smith #undef __FUNC__
1839*b0a32e0cSBarry Smith #define __FUNC__ "MatEqual_MPIBAIJ"
18407fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
18417fc3c18eSBarry Smith {
18427fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
18437fc3c18eSBarry Smith   Mat         a,b,c,d;
18447fc3c18eSBarry Smith   PetscTruth  flg;
18457fc3c18eSBarry Smith   int         ierr;
18467fc3c18eSBarry Smith 
18477fc3c18eSBarry Smith   PetscFunctionBegin;
1848273d9f13SBarry Smith   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr);
1849273d9f13SBarry Smith   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
18507fc3c18eSBarry Smith   a = matA->A; b = matA->B;
18517fc3c18eSBarry Smith   c = matB->A; d = matB->B;
18527fc3c18eSBarry Smith 
18537fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
18547fc3c18eSBarry Smith   if (flg == PETSC_TRUE) {
18557fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18567fc3c18eSBarry Smith   }
18577fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
18587fc3c18eSBarry Smith   PetscFunctionReturn(0);
18597fc3c18eSBarry Smith }
18607fc3c18eSBarry Smith 
1861273d9f13SBarry Smith 
1862273d9f13SBarry Smith #undef __FUNC__
1863*b0a32e0cSBarry Smith #define __FUNC__ "MatSetUpPreallocation_MPIBAIJ"
1864273d9f13SBarry Smith int MatSetUpPreallocation_MPIBAIJ(Mat A)
1865273d9f13SBarry Smith {
1866273d9f13SBarry Smith   int        ierr;
1867273d9f13SBarry Smith 
1868273d9f13SBarry Smith   PetscFunctionBegin;
1869273d9f13SBarry Smith   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1870273d9f13SBarry Smith   PetscFunctionReturn(0);
1871273d9f13SBarry Smith }
1872273d9f13SBarry Smith 
187379bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1874cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1875cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1876cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1877cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1878cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1879cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
18807c922b88SBarry Smith   MatMultTranspose_MPIBAIJ,
18817c922b88SBarry Smith   MatMultTransposeAdd_MPIBAIJ,
1882cc2dc46cSBarry Smith   0,
1883cc2dc46cSBarry Smith   0,
1884cc2dc46cSBarry Smith   0,
1885cc2dc46cSBarry Smith   0,
1886cc2dc46cSBarry Smith   0,
1887cc2dc46cSBarry Smith   0,
1888cc2dc46cSBarry Smith   0,
1889cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1890cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
18917fc3c18eSBarry Smith   MatEqual_MPIBAIJ,
1892cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1893cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1894cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1895cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1896cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1897cc2dc46cSBarry Smith   0,
1898cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1899cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1900cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1901cc2dc46cSBarry Smith   0,
1902cc2dc46cSBarry Smith   0,
1903cc2dc46cSBarry Smith   0,
1904cc2dc46cSBarry Smith   0,
1905273d9f13SBarry Smith   MatSetUpPreallocation_MPIBAIJ,
1906273d9f13SBarry Smith   0,
1907cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1908cc2dc46cSBarry Smith   0,
1909cc2dc46cSBarry Smith   0,
1910cc2dc46cSBarry Smith   0,
1911cc2dc46cSBarry Smith   0,
19122e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1913cc2dc46cSBarry Smith   0,
1914cc2dc46cSBarry Smith   0,
1915cc2dc46cSBarry Smith   0,
1916cc2dc46cSBarry Smith   0,
1917cc2dc46cSBarry Smith   0,
1918cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1919cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1920cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1921cc2dc46cSBarry Smith   0,
1922cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1923cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1924cc2dc46cSBarry Smith   0,
1925cc2dc46cSBarry Smith   0,
1926cc2dc46cSBarry Smith   0,
1927cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1928cc2dc46cSBarry Smith   0,
1929cc2dc46cSBarry Smith   0,
1930cc2dc46cSBarry Smith   0,
1931cc2dc46cSBarry Smith   0,
1932cc2dc46cSBarry Smith   0,
1933cc2dc46cSBarry Smith   0,
1934cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
1935cc2dc46cSBarry Smith   0,
1936cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
1937cc2dc46cSBarry Smith   0,
1938f14a1c24SBarry Smith   MatDestroy_MPIBAIJ,
1939f14a1c24SBarry Smith   MatView_MPIBAIJ,
19407843d17aSBarry Smith   MatGetMaps_Petsc,
19417843d17aSBarry Smith   0,
19427843d17aSBarry Smith   0,
19437843d17aSBarry Smith   0,
19447843d17aSBarry Smith   0,
19457843d17aSBarry Smith   0,
19467843d17aSBarry Smith   0,
19477843d17aSBarry Smith   MatGetRowMax_MPIBAIJ};
194879bdfe76SSatish Balay 
19495ef9f2a5SBarry Smith 
1950e18c124aSSatish Balay EXTERN_C_BEGIN
19515ef9f2a5SBarry Smith #undef __FUNC__
1952*b0a32e0cSBarry Smith #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
19535ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
19545ef9f2a5SBarry Smith {
19555ef9f2a5SBarry Smith   PetscFunctionBegin;
19565ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
19575ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
19585ef9f2a5SBarry Smith   PetscFunctionReturn(0);
19595ef9f2a5SBarry Smith }
1960e18c124aSSatish Balay EXTERN_C_END
196179bdfe76SSatish Balay 
1962273d9f13SBarry Smith EXTERN_C_BEGIN
1963273d9f13SBarry Smith #undef __FUNC__
1964*b0a32e0cSBarry Smith #define __FUNC__ "MatCreate_MPIBAIJ"
1965273d9f13SBarry Smith int MatCreate_MPIBAIJ(Mat B)
1966273d9f13SBarry Smith {
1967273d9f13SBarry Smith   Mat_MPIBAIJ  *b;
1968273d9f13SBarry Smith   int          ierr,size;
1969273d9f13SBarry Smith   PetscTruth   flg;
1970273d9f13SBarry Smith 
1971273d9f13SBarry Smith   PetscFunctionBegin;
1972273d9f13SBarry Smith 
1973273d9f13SBarry Smith   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1974*b0a32e0cSBarry Smith   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKERRQ(ierr);
1975273d9f13SBarry Smith   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
1976273d9f13SBarry Smith   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1977273d9f13SBarry Smith   B->mapping    = 0;
1978273d9f13SBarry Smith   B->factor     = 0;
1979273d9f13SBarry Smith   B->assembled  = PETSC_FALSE;
1980273d9f13SBarry Smith 
1981273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
1982273d9f13SBarry Smith   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1983273d9f13SBarry Smith   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1984273d9f13SBarry Smith 
1985273d9f13SBarry Smith   /* build local table of row and column ownerships */
1986*b0a32e0cSBarry Smith   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKERRQ(ierr);
1987*b0a32e0cSBarry Smith   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1988273d9f13SBarry Smith   b->cowners    = b->rowners + b->size + 2;
1989273d9f13SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
1990273d9f13SBarry Smith 
1991273d9f13SBarry Smith   /* build cache for off array entries formed */
1992273d9f13SBarry Smith   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1993273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
1994273d9f13SBarry Smith   b->colmap      = PETSC_NULL;
1995273d9f13SBarry Smith   b->garray      = PETSC_NULL;
1996273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
1997273d9f13SBarry Smith 
1998273d9f13SBarry Smith #if defined(PEYSC_USE_MAT_SINGLE)
1999273d9f13SBarry Smith   /* stuff for MatSetValues_XXX in single precision */
2000273d9f13SBarry Smith   b->lensetvalues     = 0;
2001273d9f13SBarry Smith   b->setvaluescopy    = PETSC_NULL;
2002273d9f13SBarry Smith #endif
2003273d9f13SBarry Smith 
2004273d9f13SBarry Smith   /* stuff used in block assembly */
2005273d9f13SBarry Smith   b->barray       = 0;
2006273d9f13SBarry Smith 
2007273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2008273d9f13SBarry Smith   b->lvec         = 0;
2009273d9f13SBarry Smith   b->Mvctx        = 0;
2010273d9f13SBarry Smith 
2011273d9f13SBarry Smith   /* stuff for MatGetRow() */
2012273d9f13SBarry Smith   b->rowindices   = 0;
2013273d9f13SBarry Smith   b->rowvalues    = 0;
2014273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2015273d9f13SBarry Smith 
2016273d9f13SBarry Smith   /* hash table stuff */
2017273d9f13SBarry Smith   b->ht           = 0;
2018273d9f13SBarry Smith   b->hd           = 0;
2019273d9f13SBarry Smith   b->ht_size      = 0;
2020273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2021273d9f13SBarry Smith   b->ht_fact      = 0;
2022273d9f13SBarry Smith   b->ht_total_ct  = 0;
2023273d9f13SBarry Smith   b->ht_insert_ct = 0;
2024273d9f13SBarry Smith 
2025*b0a32e0cSBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2026273d9f13SBarry Smith   if (flg) {
2027273d9f13SBarry Smith     double fact = 1.39;
2028273d9f13SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2029*b0a32e0cSBarry Smith     ierr = PetscOptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2030273d9f13SBarry Smith     if (fact <= 1.0) fact = 1.39;
2031273d9f13SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2032*b0a32e0cSBarry Smith     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2033273d9f13SBarry Smith   }
2034273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2035273d9f13SBarry Smith                                      "MatStoreValues_MPIBAIJ",
2036273d9f13SBarry Smith                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2037273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2038273d9f13SBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
2039273d9f13SBarry Smith                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2040273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2041273d9f13SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
2042273d9f13SBarry Smith                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2043273d9f13SBarry Smith   PetscFunctionReturn(0);
2044273d9f13SBarry Smith }
2045273d9f13SBarry Smith EXTERN_C_END
2046273d9f13SBarry Smith 
2047273d9f13SBarry Smith #undef __FUNC__
2048*b0a32e0cSBarry Smith #define __FUNC__ "MatMPIBAIJSetPreallocation"
2049273d9f13SBarry Smith /*@C
2050273d9f13SBarry Smith    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2051273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2052273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2053273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2054273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2055273d9f13SBarry Smith 
2056273d9f13SBarry Smith    Collective on Mat
2057273d9f13SBarry Smith 
2058273d9f13SBarry Smith    Input Parameters:
2059273d9f13SBarry Smith +  A - the matrix
2060273d9f13SBarry Smith .  bs   - size of blockk
2061273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2062273d9f13SBarry Smith            submatrix  (same for all local rows)
2063273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
2064273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
2065273d9f13SBarry Smith            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2066273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2067273d9f13SBarry Smith            submatrix (same for all local rows).
2068273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
2069273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
2070273d9f13SBarry Smith            each block row) or PETSC_NULL.
2071273d9f13SBarry Smith 
2072273d9f13SBarry Smith    Output Parameter:
2073273d9f13SBarry Smith 
2074273d9f13SBarry Smith 
2075273d9f13SBarry Smith    Options Database Keys:
2076273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2077273d9f13SBarry Smith                      block calculations (much slower)
2078273d9f13SBarry Smith .   -mat_block_size - size of the blocks to use
2079273d9f13SBarry Smith 
2080273d9f13SBarry Smith    Notes:
2081273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2082273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
2083273d9f13SBarry Smith 
2084273d9f13SBarry Smith    Storage Information:
2085273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
2086273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
2087273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
2088273d9f13SBarry Smith    local matrix (a rectangular submatrix).
2089273d9f13SBarry Smith 
2090273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
2091273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
2092273d9f13SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2093273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
2094273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2095273d9f13SBarry Smith 
2096273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2097273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
2098273d9f13SBarry Smith 
2099273d9f13SBarry Smith .vb
2100273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
2101273d9f13SBarry Smith           -------------------
2102273d9f13SBarry Smith    row 3  |  o o o d d d o o o o o o
2103273d9f13SBarry Smith    row 4  |  o o o d d d o o o o o o
2104273d9f13SBarry Smith    row 5  |  o o o d d d o o o o o o
2105273d9f13SBarry Smith           -------------------
2106273d9f13SBarry Smith .ve
2107273d9f13SBarry Smith 
2108273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
2109273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
2110273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2111273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
2112273d9f13SBarry Smith 
2113273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2114273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2115273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
2116273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2117273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
2118273d9f13SBarry Smith    matrices.
2119273d9f13SBarry Smith 
2120273d9f13SBarry Smith    Level: intermediate
2121273d9f13SBarry Smith 
2122273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel
2123273d9f13SBarry Smith 
2124273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2125273d9f13SBarry Smith @*/
2126273d9f13SBarry Smith int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2127273d9f13SBarry Smith {
2128273d9f13SBarry Smith   Mat_MPIBAIJ  *b;
2129273d9f13SBarry Smith   int          ierr,i;
2130273d9f13SBarry Smith   PetscTruth   flg2;
2131273d9f13SBarry Smith 
2132273d9f13SBarry Smith   PetscFunctionBegin;
2133273d9f13SBarry Smith   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr);
2134273d9f13SBarry Smith   if (!flg2) PetscFunctionReturn(0);
213547a75d0bSBarry Smith 
2136273d9f13SBarry Smith   B->preallocated = PETSC_TRUE;
2137*b0a32e0cSBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2138273d9f13SBarry Smith 
2139273d9f13SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2140273d9f13SBarry Smith   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -1: value %d",d_nz);
2141273d9f13SBarry Smith   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -1: value %d",o_nz);
2142273d9f13SBarry Smith   if (d_nnz) {
2143273d9f13SBarry Smith   for (i=0; i<B->m/bs; i++) {
2144273d9f13SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2145273d9f13SBarry Smith     }
2146273d9f13SBarry Smith   }
2147273d9f13SBarry Smith   if (o_nnz) {
2148273d9f13SBarry Smith     for (i=0; i<B->m/bs; i++) {
2149273d9f13SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2150273d9f13SBarry Smith     }
2151273d9f13SBarry Smith   }
2152273d9f13SBarry Smith 
215347a75d0bSBarry Smith   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
215447a75d0bSBarry Smith   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
215547a75d0bSBarry Smith   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
215647a75d0bSBarry Smith   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
215747a75d0bSBarry Smith 
2158273d9f13SBarry Smith   b = (Mat_MPIBAIJ*)B->data;
2159273d9f13SBarry Smith   b->bs  = bs;
2160273d9f13SBarry Smith   b->bs2 = bs*bs;
2161273d9f13SBarry Smith   b->mbs = B->m/bs;
2162273d9f13SBarry Smith   b->nbs = B->n/bs;
2163273d9f13SBarry Smith   b->Mbs = B->M/bs;
2164273d9f13SBarry Smith   b->Nbs = B->N/bs;
2165273d9f13SBarry Smith 
2166273d9f13SBarry Smith   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2167273d9f13SBarry Smith   b->rowners[0]    = 0;
2168273d9f13SBarry Smith   for (i=2; i<=b->size; i++) {
2169273d9f13SBarry Smith     b->rowners[i] += b->rowners[i-1];
2170273d9f13SBarry Smith   }
2171273d9f13SBarry Smith   b->rstart    = b->rowners[b->rank];
2172273d9f13SBarry Smith   b->rend      = b->rowners[b->rank+1];
2173273d9f13SBarry Smith 
2174273d9f13SBarry Smith   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2175273d9f13SBarry Smith   b->cowners[0] = 0;
2176273d9f13SBarry Smith   for (i=2; i<=b->size; i++) {
2177273d9f13SBarry Smith     b->cowners[i] += b->cowners[i-1];
2178273d9f13SBarry Smith   }
2179273d9f13SBarry Smith   b->cstart    = b->cowners[b->rank];
2180273d9f13SBarry Smith   b->cend      = b->cowners[b->rank+1];
2181273d9f13SBarry Smith 
2182273d9f13SBarry Smith   for (i=0; i<=b->size; i++) {
2183273d9f13SBarry Smith     b->rowners_bs[i] = b->rowners[i]*bs;
2184273d9f13SBarry Smith   }
2185273d9f13SBarry Smith   b->rstart_bs = b->rstart*bs;
2186273d9f13SBarry Smith   b->rend_bs   = b->rend*bs;
2187273d9f13SBarry Smith   b->cstart_bs = b->cstart*bs;
2188273d9f13SBarry Smith   b->cend_bs   = b->cend*bs;
2189273d9f13SBarry Smith 
2190273d9f13SBarry Smith   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2191273d9f13SBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2192*b0a32e0cSBarry Smith   PetscLogObjectParent(B,b->A);
2193273d9f13SBarry Smith   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2194273d9f13SBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2195*b0a32e0cSBarry Smith   PetscLogObjectParent(B,b->B);
2196273d9f13SBarry Smith   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2197273d9f13SBarry Smith 
2198273d9f13SBarry Smith   PetscFunctionReturn(0);
2199273d9f13SBarry Smith }
2200273d9f13SBarry Smith 
22015615d1e5SSatish Balay #undef __FUNC__
2202*b0a32e0cSBarry Smith #define __FUNC__ "MatCreateMPIBAIJ"
220379bdfe76SSatish Balay /*@C
220479bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
220579bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
220679bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
220779bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
220879bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
220979bdfe76SSatish Balay 
2210db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
2211db81eaa0SLois Curfman McInnes 
221279bdfe76SSatish Balay    Input Parameters:
2213db81eaa0SLois Curfman McInnes +  comm - MPI communicator
221479bdfe76SSatish Balay .  bs   - size of blockk
221579bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
221692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
221792e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
221892e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
221992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
222092e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
2221be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2222be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
222347a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
222479bdfe76SSatish Balay            submatrix  (same for all local rows)
222547a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
222692e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2227db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
222847a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
222979bdfe76SSatish Balay            submatrix (same for all local rows).
223047a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
223192e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
223292e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
223379bdfe76SSatish Balay 
223479bdfe76SSatish Balay    Output Parameter:
223579bdfe76SSatish Balay .  A - the matrix
223679bdfe76SSatish Balay 
2237db81eaa0SLois Curfman McInnes    Options Database Keys:
2238db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
2239db81eaa0SLois Curfman McInnes                      block calculations (much slower)
2240db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
22413ffaccefSLois Curfman McInnes 
2242b259b22eSLois Curfman McInnes    Notes:
224347a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
224447a75d0bSBarry Smith 
224579bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
224679bdfe76SSatish Balay    (possibly both).
224779bdfe76SSatish Balay 
2248be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2249be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2250be79a94dSBarry Smith 
225179bdfe76SSatish Balay    Storage Information:
225279bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
225379bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
225479bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
225579bdfe76SSatish Balay    local matrix (a rectangular submatrix).
225679bdfe76SSatish Balay 
225779bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
225879bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
225979bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
226079bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
226179bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
226279bdfe76SSatish Balay 
226379bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
226479bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
226579bdfe76SSatish Balay 
2266db81eaa0SLois Curfman McInnes .vb
2267db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2268db81eaa0SLois Curfman McInnes           -------------------
2269db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2270db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2271db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2272db81eaa0SLois Curfman McInnes           -------------------
2273db81eaa0SLois Curfman McInnes .ve
227479bdfe76SSatish Balay 
227579bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
227679bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
227779bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
227857b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
227979bdfe76SSatish Balay 
2280d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2281d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
228279bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
228392e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
228492e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
22856da5968aSLois Curfman McInnes    matrices.
228679bdfe76SSatish Balay 
2287027ccd11SLois Curfman McInnes    Level: intermediate
2288027ccd11SLois Curfman McInnes 
228992e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
229079bdfe76SSatish Balay 
22913eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
229279bdfe76SSatish Balay @*/
2293329f5518SBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
229479bdfe76SSatish Balay {
2295273d9f13SBarry Smith   int ierr,size;
229679bdfe76SSatish Balay 
2297d64ed03dSBarry Smith   PetscFunctionBegin;
2298273d9f13SBarry Smith   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2299d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2300273d9f13SBarry Smith   if (size > 1) {
2301273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2302273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2303273d9f13SBarry Smith   } else {
2304273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2305273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
23063914022bSBarry Smith   }
23073a40ed3dSBarry Smith   PetscFunctionReturn(0);
230879bdfe76SSatish Balay }
2309026e39d0SSatish Balay 
23105615d1e5SSatish Balay #undef __FUNC__
2311*b0a32e0cSBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ"
23122e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
23130ac07820SSatish Balay {
23140ac07820SSatish Balay   Mat         mat;
23150ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2316f1af5d2fSBarry Smith   int         ierr,len=0;
23170ac07820SSatish Balay 
2318d64ed03dSBarry Smith   PetscFunctionBegin;
23190ac07820SSatish Balay   *newmat       = 0;
2320273d9f13SBarry Smith   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2321273d9f13SBarry Smith   ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr);
2322273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
23230ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
2324273d9f13SBarry Smith   a      = (Mat_MPIBAIJ*)mat->data;
23250ac07820SSatish Balay   a->bs  = oldmat->bs;
23260ac07820SSatish Balay   a->bs2 = oldmat->bs2;
23270ac07820SSatish Balay   a->mbs = oldmat->mbs;
23280ac07820SSatish Balay   a->nbs = oldmat->nbs;
23290ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
23300ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
23310ac07820SSatish Balay 
23320ac07820SSatish Balay   a->rstart       = oldmat->rstart;
23330ac07820SSatish Balay   a->rend         = oldmat->rend;
23340ac07820SSatish Balay   a->cstart       = oldmat->cstart;
23350ac07820SSatish Balay   a->cend         = oldmat->cend;
23360ac07820SSatish Balay   a->size         = oldmat->size;
23370ac07820SSatish Balay   a->rank         = oldmat->rank;
2338aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2339aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2340aef5e8e0SSatish Balay   a->rowindices   = 0;
23410ac07820SSatish Balay   a->rowvalues    = 0;
23420ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
234330793edcSSatish Balay   a->barray       = 0;
23443123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
23453123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
23463123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
23473123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
23480ac07820SSatish Balay 
2349133cdb44SSatish Balay   /* hash table stuff */
2350133cdb44SSatish Balay   a->ht           = 0;
2351133cdb44SSatish Balay   a->hd           = 0;
2352133cdb44SSatish Balay   a->ht_size      = 0;
2353133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
235425fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2355133cdb44SSatish Balay   a->ht_total_ct  = 0;
2356133cdb44SSatish Balay   a->ht_insert_ct = 0;
2357133cdb44SSatish Balay 
2358549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
23598798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
23608798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
23610ac07820SSatish Balay   if (oldmat->colmap) {
2362aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
23630f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
236448e59246SSatish Balay #else
2365*b0a32e0cSBarry Smith     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKERRQ(ierr);
2366*b0a32e0cSBarry Smith     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2367549d3d68SSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
236848e59246SSatish Balay #endif
23690ac07820SSatish Balay   } else a->colmap = 0;
23700ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2371*b0a32e0cSBarry Smith ierr = PetscMalloc(len*sizeof(int),&(    a->garray ));CHKERRQ(ierr);
2372*b0a32e0cSBarry Smith     PetscLogObjectMemory(mat,len*sizeof(int));
2373549d3d68SSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
23740ac07820SSatish Balay   } else a->garray = 0;
23750ac07820SSatish Balay 
23760ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2377*b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->lvec);
23780ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2379e18c124aSSatish Balay 
2380*b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->Mvctx);
23812e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2382*b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->A);
23832e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2384*b0a32e0cSBarry Smith   PetscLogObjectParent(mat,a->B);
2385*b0a32e0cSBarry Smith   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
23860ac07820SSatish Balay   *newmat = mat;
23873a40ed3dSBarry Smith   PetscFunctionReturn(0);
23880ac07820SSatish Balay }
238957b952d6SSatish Balay 
2390e090d566SSatish Balay #include "petscsys.h"
239157b952d6SSatish Balay 
2392273d9f13SBarry Smith EXTERN_C_BEGIN
23935615d1e5SSatish Balay #undef __FUNC__
2394*b0a32e0cSBarry Smith #define __FUNC__ "MatLoad_MPIBAIJ"
2395*b0a32e0cSBarry Smith int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
239657b952d6SSatish Balay {
239757b952d6SSatish Balay   Mat          A;
239857b952d6SSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
239957b952d6SSatish Balay   Scalar       *vals,*buf;
240057b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
240157b952d6SSatish Balay   MPI_Status   status;
2402cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
240357b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2404f1af5d2fSBarry Smith   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
240557b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
240657b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
240757b952d6SSatish Balay 
2408d64ed03dSBarry Smith   PetscFunctionBegin;
2409*b0a32e0cSBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
241057b952d6SSatish Balay 
2411d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2412d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
241357b952d6SSatish Balay   if (!rank) {
2414*b0a32e0cSBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2415e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
241629bbc08cSBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2417d64ed03dSBarry Smith     if (header[3] < 0) {
241829bbc08cSBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2419d64ed03dSBarry Smith     }
24206c5fab8fSBarry Smith   }
2421d64ed03dSBarry Smith 
2422ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
242357b952d6SSatish Balay   M = header[1]; N = header[2];
242457b952d6SSatish Balay 
242529bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
242657b952d6SSatish Balay 
242757b952d6SSatish Balay   /*
242857b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
242957b952d6SSatish Balay      divisible by the blocksize
243057b952d6SSatish Balay   */
243157b952d6SSatish Balay   Mbs        = M/bs;
243257b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
243357b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
243457b952d6SSatish Balay   else                  Mbs++;
243557b952d6SSatish Balay   if (extra_rows &&!rank) {
2436*b0a32e0cSBarry Smith     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
243757b952d6SSatish Balay   }
2438537820f0SBarry Smith 
243957b952d6SSatish Balay   /* determine ownership of all rows */
244057b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
244157b952d6SSatish Balay   m   = mbs*bs;
2442*b0a32e0cSBarry Smith ierr = PetscMalloc(2*(size+2)*sizeof(int),&  rowners );CHKERRQ(ierr);
2443cee3aa6bSSatish Balay   browners = rowners + size + 1;
2444ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
244557b952d6SSatish Balay   rowners[0] = 0;
2446cee3aa6bSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2447cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
244857b952d6SSatish Balay   rstart = rowners[rank];
244957b952d6SSatish Balay   rend   = rowners[rank+1];
245057b952d6SSatish Balay 
245157b952d6SSatish Balay   /* distribute row lengths to all processors */
2452*b0a32e0cSBarry Smith   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKERRQ(ierr);
245357b952d6SSatish Balay   if (!rank) {
2454*b0a32e0cSBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(int),&    rowlengths );CHKERRQ(ierr);
2455e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
245657b952d6SSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2457*b0a32e0cSBarry Smith ierr = PetscMalloc(size*sizeof(int),&(    sndcounts ));CHKERRQ(ierr);
2458cee3aa6bSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2459ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2460606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2461d64ed03dSBarry Smith   } else {
2462ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
246357b952d6SSatish Balay   }
246457b952d6SSatish Balay 
246557b952d6SSatish Balay   if (!rank) {
246657b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
2467*b0a32e0cSBarry Smith ierr = PetscMalloc(size*sizeof(int),&(    procsnz ));CHKERRQ(ierr);
2468549d3d68SSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
246957b952d6SSatish Balay     for (i=0; i<size; i++) {
247057b952d6SSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
247157b952d6SSatish Balay         procsnz[i] += rowlengths[j];
247257b952d6SSatish Balay       }
247357b952d6SSatish Balay     }
2474606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
247557b952d6SSatish Balay 
247657b952d6SSatish Balay     /* determine max buffer needed and allocate it */
247757b952d6SSatish Balay     maxnz = 0;
247857b952d6SSatish Balay     for (i=0; i<size; i++) {
247957b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
248057b952d6SSatish Balay     }
2481*b0a32e0cSBarry Smith ierr = PetscMalloc(maxnz*sizeof(int),&(    cols ));CHKERRQ(ierr);
248257b952d6SSatish Balay 
248357b952d6SSatish Balay     /* read in my part of the matrix column indices  */
248457b952d6SSatish Balay     nz = procsnz[0];
2485*b0a32e0cSBarry Smith ierr = PetscMalloc(nz*sizeof(int),&(    ibuf ));CHKERRQ(ierr);
248657b952d6SSatish Balay     mycols = ibuf;
2487cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2488e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2489cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2490cee3aa6bSSatish Balay 
249157b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
249257b952d6SSatish Balay     for (i=1; i<size-1; i++) {
249357b952d6SSatish Balay       nz   = procsnz[i];
2494e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2495ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
249657b952d6SSatish Balay     }
249757b952d6SSatish Balay     /* read in the stuff for the last proc */
249857b952d6SSatish Balay     if (size != 1) {
249957b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2500e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
250157b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2502ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
250357b952d6SSatish Balay     }
2504606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2505d64ed03dSBarry Smith   } else {
250657b952d6SSatish Balay     /* determine buffer space needed for message */
250757b952d6SSatish Balay     nz = 0;
250857b952d6SSatish Balay     for (i=0; i<m; i++) {
250957b952d6SSatish Balay       nz += locrowlens[i];
251057b952d6SSatish Balay     }
2511*b0a32e0cSBarry Smith ierr = PetscMalloc(nz*sizeof(int),&(    ibuf   ));CHKERRQ(ierr);
251257b952d6SSatish Balay     mycols = ibuf;
251357b952d6SSatish Balay     /* receive message of column indices*/
2514ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2515ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
251629bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
251757b952d6SSatish Balay   }
251857b952d6SSatish Balay 
251957b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2520*b0a32e0cSBarry Smith   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKERRQ(ierr);
2521cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
2522*b0a32e0cSBarry Smith ierr = PetscMalloc(3*Mbs*sizeof(int),&(  mask   ));CHKERRQ(ierr);
2523549d3d68SSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
252457b952d6SSatish Balay   masked1 = mask    + Mbs;
252557b952d6SSatish Balay   masked2 = masked1 + Mbs;
252657b952d6SSatish Balay   rowcount = 0; nzcount = 0;
252757b952d6SSatish Balay   for (i=0; i<mbs; i++) {
252857b952d6SSatish Balay     dcount  = 0;
252957b952d6SSatish Balay     odcount = 0;
253057b952d6SSatish Balay     for (j=0; j<bs; j++) {
253157b952d6SSatish Balay       kmax = locrowlens[rowcount];
253257b952d6SSatish Balay       for (k=0; k<kmax; k++) {
253357b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
253457b952d6SSatish Balay         if (!mask[tmp]) {
253557b952d6SSatish Balay           mask[tmp] = 1;
253657b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
253757b952d6SSatish Balay           else masked1[dcount++] = tmp;
253857b952d6SSatish Balay         }
253957b952d6SSatish Balay       }
254057b952d6SSatish Balay       rowcount++;
254157b952d6SSatish Balay     }
2542cee3aa6bSSatish Balay 
254357b952d6SSatish Balay     dlens[i]  = dcount;
254457b952d6SSatish Balay     odlens[i] = odcount;
2545cee3aa6bSSatish Balay 
254657b952d6SSatish Balay     /* zero out the mask elements we set */
254757b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
254857b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
254957b952d6SSatish Balay   }
2550cee3aa6bSSatish Balay 
255157b952d6SSatish Balay   /* create our matrix */
255247a75d0bSBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
255357b952d6SSatish Balay   A = *newmat;
25546d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
255557b952d6SSatish Balay 
255657b952d6SSatish Balay   if (!rank) {
2557*b0a32e0cSBarry Smith ierr = PetscMalloc(maxnz*sizeof(Scalar),&(    buf ));CHKERRQ(ierr);
255857b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
255957b952d6SSatish Balay     nz = procsnz[0];
256057b952d6SSatish Balay     vals = buf;
2561cee3aa6bSSatish Balay     mycols = ibuf;
2562cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2563e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2564cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2565537820f0SBarry Smith 
256657b952d6SSatish Balay     /* insert into matrix */
256757b952d6SSatish Balay     jj      = rstart*bs;
256857b952d6SSatish Balay     for (i=0; i<m; i++) {
2569b48991e4SBarry Smith       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
257057b952d6SSatish Balay       mycols += locrowlens[i];
257157b952d6SSatish Balay       vals   += locrowlens[i];
257257b952d6SSatish Balay       jj++;
257357b952d6SSatish Balay     }
257457b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
257557b952d6SSatish Balay     for (i=1; i<size-1; i++) {
257657b952d6SSatish Balay       nz   = procsnz[i];
257757b952d6SSatish Balay       vals = buf;
2578e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2579ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
258057b952d6SSatish Balay     }
258157b952d6SSatish Balay     /* the last proc */
258257b952d6SSatish Balay     if (size != 1){
258357b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2584cee3aa6bSSatish Balay       vals = buf;
2585e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
258657b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2587ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
258857b952d6SSatish Balay     }
2589606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2590d64ed03dSBarry Smith   } else {
259157b952d6SSatish Balay     /* receive numeric values */
2592*b0a32e0cSBarry Smith ierr = PetscMalloc(nz*sizeof(Scalar),&(    buf ));CHKERRQ(ierr);
259357b952d6SSatish Balay 
259457b952d6SSatish Balay     /* receive message of values*/
259557b952d6SSatish Balay     vals   = buf;
2596cee3aa6bSSatish Balay     mycols = ibuf;
2597ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2598ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
259929bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
260057b952d6SSatish Balay 
260157b952d6SSatish Balay     /* insert into matrix */
260257b952d6SSatish Balay     jj      = rstart*bs;
2603cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
2604b48991e4SBarry Smith       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
260557b952d6SSatish Balay       mycols += locrowlens[i];
260657b952d6SSatish Balay       vals   += locrowlens[i];
260757b952d6SSatish Balay       jj++;
260857b952d6SSatish Balay     }
260957b952d6SSatish Balay   }
2610606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2611606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2612606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2613606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2614606d414cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2615606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
26166d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
26176d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
26183a40ed3dSBarry Smith   PetscFunctionReturn(0);
261957b952d6SSatish Balay }
2620273d9f13SBarry Smith EXTERN_C_END
262157b952d6SSatish Balay 
2622133cdb44SSatish Balay #undef __FUNC__
2623*b0a32e0cSBarry Smith #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2624133cdb44SSatish Balay /*@
2625133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2626133cdb44SSatish Balay 
2627133cdb44SSatish Balay    Input Parameters:
2628133cdb44SSatish Balay .  mat  - the matrix
2629133cdb44SSatish Balay .  fact - factor
2630133cdb44SSatish Balay 
2631fee21e36SBarry Smith    Collective on Mat
2632fee21e36SBarry Smith 
26338c890885SBarry Smith    Level: advanced
26348c890885SBarry Smith 
2635133cdb44SSatish Balay   Notes:
2636133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2637133cdb44SSatish Balay 
2638133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2639133cdb44SSatish Balay 
2640133cdb44SSatish Balay .seealso: MatSetOption()
2641133cdb44SSatish Balay @*/
2642329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2643133cdb44SSatish Balay {
264425fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2645273d9f13SBarry Smith   int         ierr;
2646273d9f13SBarry Smith   PetscTruth  flg;
2647133cdb44SSatish Balay 
2648133cdb44SSatish Balay   PetscFunctionBegin;
2649133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2650273d9f13SBarry Smith   ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr);
2651273d9f13SBarry Smith   if (!flg) {
265229bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only.");
2653133cdb44SSatish Balay   }
2654133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2655133cdb44SSatish Balay   baij->ht_fact = fact;
2656133cdb44SSatish Balay   PetscFunctionReturn(0);
2657133cdb44SSatish Balay }
2658