xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision be1d678a52e6eff2808b2fa31ae986cdbf03c9fe)
1*be1d678aSKris Buschelman #define PETSCMAT_DLL
279bdfe76SSatish Balay 
3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
479bdfe76SSatish Balay 
5dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6dfbe8321SBarry Smith EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7b24ad042SBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8b24ad042SBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9b24ad042SBarry Smith EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12b24ad042SBarry Smith EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13b24ad042SBarry Smith EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14dfbe8321SBarry Smith EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
15dfbe8321SBarry Smith EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
1693fea6afSBarry Smith 
1793fea6afSBarry Smith /*  UGLY, ugly, ugly
1887828ca2SBarry Smith    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
1993fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
2093fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2193fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2293fea6afSBarry Smith    into the single precision data structures.
2393fea6afSBarry Smith */
2493fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
25b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
3093fea6afSBarry Smith #else
316fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
3293fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
3393fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
346fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
356fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
3693fea6afSBarry Smith #endif
373b2fbd54SBarry Smith 
384a2ae208SSatish Balay #undef __FUNCT__
394a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
417843d17aSBarry Smith {
427843d17aSBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
43dfbe8321SBarry Smith   PetscErrorCode ierr;
44b24ad042SBarry Smith   PetscInt       i;
4587828ca2SBarry Smith   PetscScalar    *va,*vb;
467843d17aSBarry Smith   Vec            vtmp;
477843d17aSBarry Smith 
487843d17aSBarry Smith   PetscFunctionBegin;
497843d17aSBarry Smith 
507843d17aSBarry Smith   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
511ebc52fbSHong Zhang   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
527843d17aSBarry Smith 
53ac355199SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
547843d17aSBarry Smith   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
551ebc52fbSHong Zhang   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
567843d17aSBarry Smith 
57273d9f13SBarry Smith   for (i=0; i<A->m; i++){
58273d9f13SBarry Smith     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
597843d17aSBarry Smith   }
607843d17aSBarry Smith 
611ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
621ebc52fbSHong Zhang   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63ac355199SBarry Smith   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
647843d17aSBarry Smith 
657843d17aSBarry Smith   PetscFunctionReturn(0);
667843d17aSBarry Smith }
677843d17aSBarry Smith 
687fc3c18eSBarry Smith EXTERN_C_BEGIN
694a2ae208SSatish Balay #undef __FUNCT__
704a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
727fc3c18eSBarry Smith {
737fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
74dfbe8321SBarry Smith   PetscErrorCode ierr;
757fc3c18eSBarry Smith 
767fc3c18eSBarry Smith   PetscFunctionBegin;
777fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
787fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
797fc3c18eSBarry Smith   PetscFunctionReturn(0);
807fc3c18eSBarry Smith }
817fc3c18eSBarry Smith EXTERN_C_END
827fc3c18eSBarry Smith 
837fc3c18eSBarry Smith EXTERN_C_BEGIN
844a2ae208SSatish Balay #undef __FUNCT__
854a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
877fc3c18eSBarry Smith {
887fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
89dfbe8321SBarry Smith   PetscErrorCode ierr;
907fc3c18eSBarry Smith 
917fc3c18eSBarry Smith   PetscFunctionBegin;
927fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
937fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
947fc3c18eSBarry Smith   PetscFunctionReturn(0);
957fc3c18eSBarry Smith }
967fc3c18eSBarry Smith EXTERN_C_END
977fc3c18eSBarry Smith 
98537820f0SBarry Smith /*
99537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
10057b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
10157b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
10257b952d6SSatish Balay    length of colmap equals the global matrix length.
10357b952d6SSatish Balay */
1044a2ae208SSatish Balay #undef __FUNCT__
1054a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
10757b952d6SSatish Balay {
10857b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
10957b952d6SSatish Balay   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
1106849ba73SBarry Smith   PetscErrorCode ierr;
111521d7252SBarry Smith   PetscInt       nbs = B->nbs,i,bs=mat->bs;
11257b952d6SSatish Balay 
113d64ed03dSBarry Smith   PetscFunctionBegin;
114aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
115f14a1c24SBarry Smith   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
11648e59246SSatish Balay   for (i=0; i<nbs; i++){
1170f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
11848e59246SSatish Balay   }
11948e59246SSatish Balay #else
120b24ad042SBarry Smith   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
12152e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122b24ad042SBarry Smith   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
123928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
12448e59246SSatish Balay #endif
1253a40ed3dSBarry Smith   PetscFunctionReturn(0);
12657b952d6SSatish Balay }
12757b952d6SSatish Balay 
12880c1aa95SSatish Balay #define CHUNKSIZE  10
12980c1aa95SSatish Balay 
130f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
13180c1aa95SSatish Balay { \
13280c1aa95SSatish Balay  \
13380c1aa95SSatish Balay     brow = row/bs;  \
13480c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
13680c1aa95SSatish Balay       bcol = col/bs; \
13780c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
138ab26458aSBarry Smith       low = 0; high = nrow; \
139ab26458aSBarry Smith       while (high-low > 3) { \
140ab26458aSBarry Smith         t = (low+high)/2; \
141ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
142ab26458aSBarry Smith         else              low  = t; \
143ab26458aSBarry Smith       } \
144ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
14580c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
14680c1aa95SSatish Balay         if (rp[_i] == bcol) { \
14780c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
148eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
149eada6651SSatish Balay           else                    *bap  = value;  \
150ac7a638eSSatish Balay           goto a_noinsert; \
15180c1aa95SSatish Balay         } \
15280c1aa95SSatish Balay       } \
15389280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
15477431f27SBarry Smith       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
15580c1aa95SSatish Balay       if (nrow >= rmax) { \
15680c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
157b24ad042SBarry Smith         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
1583eda8832SBarry Smith         MatScalar *new_a; \
15980c1aa95SSatish Balay  \
16077431f27SBarry Smith         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
16189280ab3SLois Curfman McInnes  \
16280c1aa95SSatish Balay         /* malloc new storage space */ \
163b24ad042SBarry Smith         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
16482502324SSatish Balay         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
165b24ad042SBarry Smith         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
16680c1aa95SSatish Balay         new_i   = new_j + new_nz; \
16780c1aa95SSatish Balay  \
16880c1aa95SSatish Balay         /* copy over old data into new slots */ \
16980c1aa95SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
17080c1aa95SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
171b24ad042SBarry Smith         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
17280c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
173b24ad042SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
1743eda8832SBarry Smith         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
17587828ca2SBarry Smith         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
176549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
1773eda8832SBarry Smith                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
17880c1aa95SSatish Balay         /* free up old matrix storage */ \
179606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
180606d414cSSatish Balay         if (!a->singlemalloc) { \
181606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
182606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
183606d414cSSatish Balay         } \
18480c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1857c922b88SBarry Smith         a->singlemalloc = PETSC_TRUE; \
18680c1aa95SSatish Balay  \
18780c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
188ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
18952e6d16bSBarry Smith         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
19080c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
19180c1aa95SSatish Balay         a->reallocs++; \
19280c1aa95SSatish Balay         a->nz++; \
19380c1aa95SSatish Balay       } \
19480c1aa95SSatish Balay       N = nrow++ - 1;  \
19580c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
19680c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
19780c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1983eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
19980c1aa95SSatish Balay       } \
2003eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
20180c1aa95SSatish Balay       rp[_i]                      = bcol;  \
20280c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
203ac7a638eSSatish Balay       a_noinsert:; \
20480c1aa95SSatish Balay     ailen[brow] = nrow; \
20580c1aa95SSatish Balay }
20657b952d6SSatish Balay 
207ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
208ac7a638eSSatish Balay { \
209ac7a638eSSatish Balay     brow = row/bs;  \
210ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
212ac7a638eSSatish Balay       bcol = col/bs; \
213ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
214ac7a638eSSatish Balay       low = 0; high = nrow; \
215ac7a638eSSatish Balay       while (high-low > 3) { \
216ac7a638eSSatish Balay         t = (low+high)/2; \
217ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
218ac7a638eSSatish Balay         else              low  = t; \
219ac7a638eSSatish Balay       } \
220ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
221ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
222ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
223ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
224ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
225ac7a638eSSatish Balay           else                    *bap  = value;  \
226ac7a638eSSatish Balay           goto b_noinsert; \
227ac7a638eSSatish Balay         } \
228ac7a638eSSatish Balay       } \
22989280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
23077431f27SBarry Smith       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
231ac7a638eSSatish Balay       if (nrow >= rmax) { \
232ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
233b24ad042SBarry Smith         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
2343eda8832SBarry Smith         MatScalar *new_a; \
235ac7a638eSSatish Balay  \
23677431f27SBarry Smith         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
23789280ab3SLois Curfman McInnes  \
238ac7a638eSSatish Balay         /* malloc new storage space */ \
239b24ad042SBarry Smith         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
24082502324SSatish Balay         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
241b24ad042SBarry Smith         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
242ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
243ac7a638eSSatish Balay  \
244ac7a638eSSatish Balay         /* copy over old data into new slots */ \
245ac7a638eSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
24674c639caSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
247b24ad042SBarry Smith         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
248ac7a638eSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
249b24ad042SBarry Smith         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
2503eda8832SBarry Smith         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
2513eda8832SBarry Smith         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
252549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
2533eda8832SBarry Smith                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
254ac7a638eSSatish Balay         /* free up old matrix storage */ \
255606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
256606d414cSSatish Balay         if (!b->singlemalloc) { \
257606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
258606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
259606d414cSSatish Balay         } \
26074c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
2617c922b88SBarry Smith         b->singlemalloc = PETSC_TRUE; \
262ac7a638eSSatish Balay  \
263ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
264ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
26552e6d16bSBarry Smith         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
26674c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
26774c639caSSatish Balay         b->reallocs++; \
26874c639caSSatish Balay         b->nz++; \
269ac7a638eSSatish Balay       } \
270ac7a638eSSatish Balay       N = nrow++ - 1;  \
271ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
272ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
273ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2743eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
275ac7a638eSSatish Balay       } \
2763eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
277ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
278ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
279ac7a638eSSatish Balay       b_noinsert:; \
280ac7a638eSSatish Balay     bilen[brow] = nrow; \
281ac7a638eSSatish Balay }
282ac7a638eSSatish Balay 
28393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2844a2ae208SSatish Balay #undef __FUNCT__
2854a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ"
286b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
28757b952d6SSatish Balay {
2886fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
289dfbe8321SBarry Smith   PetscErrorCode ierr;
290b24ad042SBarry Smith   PetscInt       i,N = m*n;
2916fa18ffdSBarry Smith   MatScalar      *vsingle;
29293fea6afSBarry Smith 
29393fea6afSBarry Smith   PetscFunctionBegin;
2946fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2956fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
29682502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2976fa18ffdSBarry Smith     b->setvalueslen  = N;
2986fa18ffdSBarry Smith   }
2996fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3006fa18ffdSBarry Smith 
30193fea6afSBarry Smith   for (i=0; i<N; i++) {
30293fea6afSBarry Smith     vsingle[i] = v[i];
30393fea6afSBarry Smith   }
30493fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
30593fea6afSBarry Smith   PetscFunctionReturn(0);
30693fea6afSBarry Smith }
30793fea6afSBarry Smith 
3084a2ae208SSatish Balay #undef __FUNCT__
3094a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
310b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
31193fea6afSBarry Smith {
3126fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
313dfbe8321SBarry Smith   PetscErrorCode ierr;
314b24ad042SBarry Smith   PetscInt       i,N = m*n*b->bs2;
3156fa18ffdSBarry Smith   MatScalar      *vsingle;
31693fea6afSBarry Smith 
31793fea6afSBarry Smith   PetscFunctionBegin;
3186fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3196fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
32082502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3216fa18ffdSBarry Smith     b->setvalueslen  = N;
3226fa18ffdSBarry Smith   }
3236fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
32493fea6afSBarry Smith   for (i=0; i<N; i++) {
32593fea6afSBarry Smith     vsingle[i] = v[i];
32693fea6afSBarry Smith   }
32793fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
32893fea6afSBarry Smith   PetscFunctionReturn(0);
32993fea6afSBarry Smith }
3306fa18ffdSBarry Smith 
3314a2ae208SSatish Balay #undef __FUNCT__
3324a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
333b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
3346fa18ffdSBarry Smith {
3356fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
336dfbe8321SBarry Smith   PetscErrorCode ierr;
337b24ad042SBarry Smith   PetscInt       i,N = m*n;
3386fa18ffdSBarry Smith   MatScalar      *vsingle;
3396fa18ffdSBarry Smith 
3406fa18ffdSBarry Smith   PetscFunctionBegin;
3416fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3426fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
34382502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3446fa18ffdSBarry Smith     b->setvalueslen  = N;
3456fa18ffdSBarry Smith   }
3466fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3476fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3486fa18ffdSBarry Smith     vsingle[i] = v[i];
3496fa18ffdSBarry Smith   }
3506fa18ffdSBarry Smith   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3516fa18ffdSBarry Smith   PetscFunctionReturn(0);
3526fa18ffdSBarry Smith }
3536fa18ffdSBarry Smith 
3544a2ae208SSatish Balay #undef __FUNCT__
3554a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
356b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
3576fa18ffdSBarry Smith {
3586fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
359dfbe8321SBarry Smith   PetscErrorCode ierr;
360b24ad042SBarry Smith   PetscInt       i,N = m*n*b->bs2;
3616fa18ffdSBarry Smith   MatScalar      *vsingle;
3626fa18ffdSBarry Smith 
3636fa18ffdSBarry Smith   PetscFunctionBegin;
3646fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3656fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
36682502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3676fa18ffdSBarry Smith     b->setvalueslen  = N;
3686fa18ffdSBarry Smith   }
3696fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3706fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3716fa18ffdSBarry Smith     vsingle[i] = v[i];
3726fa18ffdSBarry Smith   }
3736fa18ffdSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3746fa18ffdSBarry Smith   PetscFunctionReturn(0);
3756fa18ffdSBarry Smith }
37693fea6afSBarry Smith #endif
37793fea6afSBarry Smith 
3784a2ae208SSatish Balay #undef __FUNCT__
379e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
380b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
38193fea6afSBarry Smith {
38257b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
38393fea6afSBarry Smith   MatScalar      value;
384273d9f13SBarry Smith   PetscTruth     roworiented = baij->roworiented;
385dfbe8321SBarry Smith   PetscErrorCode ierr;
386b24ad042SBarry Smith   PetscInt       i,j,row,col;
387b24ad042SBarry Smith   PetscInt       rstart_orig=baij->rstart_bs;
388b24ad042SBarry Smith   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
389521d7252SBarry Smith   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
39057b952d6SSatish Balay 
391eada6651SSatish Balay   /* Some Variables required in the macro */
39280c1aa95SSatish Balay   Mat            A = baij->A;
39380c1aa95SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
394b24ad042SBarry Smith   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
3953eda8832SBarry Smith   MatScalar      *aa=a->a;
396ac7a638eSSatish Balay 
397ac7a638eSSatish Balay   Mat            B = baij->B;
398ac7a638eSSatish Balay   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
399b24ad042SBarry Smith   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
4003eda8832SBarry Smith   MatScalar      *ba=b->a;
401ac7a638eSSatish Balay 
402b24ad042SBarry Smith   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
403b24ad042SBarry Smith   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
4043eda8832SBarry Smith   MatScalar      *ap,*bap;
40580c1aa95SSatish Balay 
406d64ed03dSBarry Smith   PetscFunctionBegin;
40757b952d6SSatish Balay   for (i=0; i<m; i++) {
4085ef9f2a5SBarry Smith     if (im[i] < 0) continue;
4092515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
41077431f27SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
411639f9d9dSBarry Smith #endif
41257b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
41357b952d6SSatish Balay       row = im[i] - rstart_orig;
41457b952d6SSatish Balay       for (j=0; j<n; j++) {
41557b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
41657b952d6SSatish Balay           col = in[j] - cstart_orig;
41757b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
418f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
41980c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
42073959e64SBarry Smith         } else if (in[j] < 0) continue;
4212515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
42277431f27SBarry Smith         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
423639f9d9dSBarry Smith #endif
42457b952d6SSatish Balay         else {
42557b952d6SSatish Balay           if (mat->was_assembled) {
426905e6a2fSBarry Smith             if (!baij->colmap) {
427905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
428905e6a2fSBarry Smith             }
429aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4300f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
431bba1ac68SSatish Balay             col  = col - 1;
43248e59246SSatish Balay #else
433bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
43448e59246SSatish Balay #endif
43557b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
43657b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4378295de27SSatish Balay               col =  in[j];
4389bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
4399bf004c3SSatish Balay               B = baij->B;
4409bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
4419bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
4429bf004c3SSatish Balay               ba=b->a;
443bba1ac68SSatish Balay             } else col += in[j]%bs;
4448295de27SSatish Balay           } else col = in[j];
44557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
44690da58bdSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
44790da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
44857b952d6SSatish Balay         }
44957b952d6SSatish Balay       }
450d64ed03dSBarry Smith     } else {
45190f02eecSBarry Smith       if (!baij->donotstash) {
452ff2fd236SBarry Smith         if (roworiented) {
4536fa18ffdSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
454ff2fd236SBarry Smith         } else {
4556fa18ffdSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
45657b952d6SSatish Balay         }
45757b952d6SSatish Balay       }
45857b952d6SSatish Balay     }
45990f02eecSBarry Smith   }
4603a40ed3dSBarry Smith   PetscFunctionReturn(0);
46157b952d6SSatish Balay }
46257b952d6SSatish Balay 
4634a2ae208SSatish Balay #undef __FUNCT__
4644a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
465b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
466ab26458aSBarry Smith {
467ab26458aSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
468f15d580aSBarry Smith   const MatScalar *value;
469f15d580aSBarry Smith   MatScalar       *barray=baij->barray;
470273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
471dfbe8321SBarry Smith   PetscErrorCode  ierr;
472b24ad042SBarry Smith   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
473b24ad042SBarry Smith   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
474521d7252SBarry Smith   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
475ab26458aSBarry Smith 
476b16ae2b1SBarry Smith   PetscFunctionBegin;
47730793edcSSatish Balay   if(!barray) {
47882502324SSatish Balay     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
47982502324SSatish Balay     baij->barray = barray;
48030793edcSSatish Balay   }
48130793edcSSatish Balay 
482ab26458aSBarry Smith   if (roworiented) {
483ab26458aSBarry Smith     stepval = (n-1)*bs;
484ab26458aSBarry Smith   } else {
485ab26458aSBarry Smith     stepval = (m-1)*bs;
486ab26458aSBarry Smith   }
487ab26458aSBarry Smith   for (i=0; i<m; i++) {
4885ef9f2a5SBarry Smith     if (im[i] < 0) continue;
4892515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
49077431f27SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
491ab26458aSBarry Smith #endif
492ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
493ab26458aSBarry Smith       row = im[i] - rstart;
494ab26458aSBarry Smith       for (j=0; j<n; j++) {
49515b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
49615b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
497f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
49815b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
499f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
50015b57d14SSatish Balay         } else { /* Here a copy is required */
501ab26458aSBarry Smith           if (roworiented) {
502ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
503ab26458aSBarry Smith           } else {
504ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
505abef11f7SSatish Balay           }
50647513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
50747513183SBarry Smith             for (jj=0; jj<bs; jj++) {
50830793edcSSatish Balay               *barray++  = *value++;
50947513183SBarry Smith             }
51047513183SBarry Smith           }
51130793edcSSatish Balay           barray -=bs2;
51215b57d14SSatish Balay         }
513abef11f7SSatish Balay 
514abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
515abef11f7SSatish Balay           col  = in[j] - cstart;
51693fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
517ab26458aSBarry Smith         }
5185ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
5192515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
52077431f27SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
521ab26458aSBarry Smith #endif
522ab26458aSBarry Smith         else {
523ab26458aSBarry Smith           if (mat->was_assembled) {
524ab26458aSBarry Smith             if (!baij->colmap) {
525ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
526ab26458aSBarry Smith             }
527a5eb4965SSatish Balay 
5282515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
529aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
530b24ad042SBarry Smith             { PetscInt data;
5310f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
53229bbc08cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
533fa46199cSSatish Balay             }
53448e59246SSatish Balay #else
53529bbc08cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
536a5eb4965SSatish Balay #endif
53748e59246SSatish Balay #endif
538aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
5390f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
540fa46199cSSatish Balay             col  = (col - 1)/bs;
54148e59246SSatish Balay #else
542a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
54348e59246SSatish Balay #endif
544ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
545ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
546ab26458aSBarry Smith               col =  in[j];
547ab26458aSBarry Smith             }
548ab26458aSBarry Smith           }
549ab26458aSBarry Smith           else col = in[j];
55093fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
551ab26458aSBarry Smith         }
552ab26458aSBarry Smith       }
553d64ed03dSBarry Smith     } else {
554ab26458aSBarry Smith       if (!baij->donotstash) {
555ff2fd236SBarry Smith         if (roworiented) {
5566fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
557ff2fd236SBarry Smith         } else {
5586fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
559ff2fd236SBarry Smith         }
560abef11f7SSatish Balay       }
561ab26458aSBarry Smith     }
562ab26458aSBarry Smith   }
5633a40ed3dSBarry Smith   PetscFunctionReturn(0);
564ab26458aSBarry Smith }
5656fa18ffdSBarry Smith 
5660bdbc534SSatish Balay #define HASH_KEY 0.6180339887
567b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
568b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
569b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
5704a2ae208SSatish Balay #undef __FUNCT__
5714a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
572b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
5730bdbc534SSatish Balay {
5740bdbc534SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
575273d9f13SBarry Smith   PetscTruth     roworiented = baij->roworiented;
576dfbe8321SBarry Smith   PetscErrorCode ierr;
577b24ad042SBarry Smith   PetscInt       i,j,row,col;
578b24ad042SBarry Smith   PetscInt       rstart_orig=baij->rstart_bs;
579b24ad042SBarry Smith   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
580521d7252SBarry Smith   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
581329f5518SBarry Smith   PetscReal      tmp;
5823eda8832SBarry Smith   MatScalar      **HD = baij->hd,value;
5832515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
584b24ad042SBarry Smith   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5854a15367fSSatish Balay #endif
5860bdbc534SSatish Balay 
5870bdbc534SSatish Balay   PetscFunctionBegin;
5880bdbc534SSatish Balay 
5890bdbc534SSatish Balay   for (i=0; i<m; i++) {
5902515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
59129bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
59277431f27SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
5930bdbc534SSatish Balay #endif
5940bdbc534SSatish Balay       row = im[i];
595c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
5960bdbc534SSatish Balay       for (j=0; j<n; j++) {
5970bdbc534SSatish Balay         col = in[j];
5986fa18ffdSBarry Smith         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
599b24ad042SBarry Smith         /* Look up PetscInto the Hash Table */
600c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
601c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6020bdbc534SSatish Balay 
603c2760754SSatish Balay 
604c2760754SSatish Balay         idx = h1;
6052515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
606187ce0cbSSatish Balay         insert_ct++;
607187ce0cbSSatish Balay         total_ct++;
608187ce0cbSSatish Balay         if (HT[idx] != key) {
609187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
610187ce0cbSSatish Balay           if (idx == size) {
611187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
612187ce0cbSSatish Balay             if (idx == h1) {
61377431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
614187ce0cbSSatish Balay             }
615187ce0cbSSatish Balay           }
616187ce0cbSSatish Balay         }
617187ce0cbSSatish Balay #else
618c2760754SSatish Balay         if (HT[idx] != key) {
619c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
620c2760754SSatish Balay           if (idx == size) {
621c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
622c2760754SSatish Balay             if (idx == h1) {
62377431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
624c2760754SSatish Balay             }
625c2760754SSatish Balay           }
626c2760754SSatish Balay         }
627187ce0cbSSatish Balay #endif
628c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
629c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
630c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
6310bdbc534SSatish Balay       }
6320bdbc534SSatish Balay     } else {
6330bdbc534SSatish Balay       if (!baij->donotstash) {
634ff2fd236SBarry Smith         if (roworiented) {
6358798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
636ff2fd236SBarry Smith         } else {
6378798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
6380bdbc534SSatish Balay         }
6390bdbc534SSatish Balay       }
6400bdbc534SSatish Balay     }
6410bdbc534SSatish Balay   }
6422515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
643187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
644187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
645187ce0cbSSatish Balay #endif
6460bdbc534SSatish Balay   PetscFunctionReturn(0);
6470bdbc534SSatish Balay }
6480bdbc534SSatish Balay 
6494a2ae208SSatish Balay #undef __FUNCT__
6504a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
651b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
6520bdbc534SSatish Balay {
6530bdbc534SSatish Balay   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
654273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
655dfbe8321SBarry Smith   PetscErrorCode  ierr;
656b24ad042SBarry Smith   PetscInt        i,j,ii,jj,row,col;
657b24ad042SBarry Smith   PetscInt        rstart=baij->rstart ;
658521d7252SBarry Smith   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
659b24ad042SBarry Smith   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
660329f5518SBarry Smith   PetscReal       tmp;
6613eda8832SBarry Smith   MatScalar       **HD = baij->hd,*baij_a;
662f15d580aSBarry Smith   const MatScalar *v_t,*value;
6632515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
664b24ad042SBarry Smith   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
6654a15367fSSatish Balay #endif
6660bdbc534SSatish Balay 
667d0a41580SSatish Balay   PetscFunctionBegin;
668d0a41580SSatish Balay 
6690bdbc534SSatish Balay   if (roworiented) {
6700bdbc534SSatish Balay     stepval = (n-1)*bs;
6710bdbc534SSatish Balay   } else {
6720bdbc534SSatish Balay     stepval = (m-1)*bs;
6730bdbc534SSatish Balay   }
6740bdbc534SSatish Balay   for (i=0; i<m; i++) {
6752515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
67677431f27SBarry Smith     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
67777431f27SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
6780bdbc534SSatish Balay #endif
6790bdbc534SSatish Balay     row   = im[i];
680187ce0cbSSatish Balay     v_t   = v + i*bs2;
681c2760754SSatish Balay     if (row >= rstart && row < rend) {
6820bdbc534SSatish Balay       for (j=0; j<n; j++) {
6830bdbc534SSatish Balay         col = in[j];
6840bdbc534SSatish Balay 
6850bdbc534SSatish Balay         /* Look up into the Hash Table */
686c2760754SSatish Balay         key = row*Nbs+col+1;
687c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6880bdbc534SSatish Balay 
689c2760754SSatish Balay         idx = h1;
6902515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
691187ce0cbSSatish Balay         total_ct++;
692187ce0cbSSatish Balay         insert_ct++;
693187ce0cbSSatish Balay        if (HT[idx] != key) {
694187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
695187ce0cbSSatish Balay           if (idx == size) {
696187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
697187ce0cbSSatish Balay             if (idx == h1) {
69877431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
699187ce0cbSSatish Balay             }
700187ce0cbSSatish Balay           }
701187ce0cbSSatish Balay         }
702187ce0cbSSatish Balay #else
703c2760754SSatish Balay         if (HT[idx] != key) {
704c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
705c2760754SSatish Balay           if (idx == size) {
706c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
707c2760754SSatish Balay             if (idx == h1) {
70877431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
709c2760754SSatish Balay             }
710c2760754SSatish Balay           }
711c2760754SSatish Balay         }
712187ce0cbSSatish Balay #endif
713c2760754SSatish Balay         baij_a = HD[idx];
7140bdbc534SSatish Balay         if (roworiented) {
715c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
716187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
717187ce0cbSSatish Balay           value = v_t;
718187ce0cbSSatish Balay           v_t  += bs;
719fef45726SSatish Balay           if (addv == ADD_VALUES) {
720c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
721c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
722fef45726SSatish Balay                 baij_a[jj]  += *value++;
723b4cc0f5aSSatish Balay               }
724b4cc0f5aSSatish Balay             }
725fef45726SSatish Balay           } else {
726c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
727c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
728fef45726SSatish Balay                 baij_a[jj]  = *value++;
729fef45726SSatish Balay               }
730fef45726SSatish Balay             }
731fef45726SSatish Balay           }
7320bdbc534SSatish Balay         } else {
7330bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
734fef45726SSatish Balay           if (addv == ADD_VALUES) {
735b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
7360bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
737fef45726SSatish Balay                 baij_a[jj]  += *value++;
738fef45726SSatish Balay               }
739fef45726SSatish Balay             }
740fef45726SSatish Balay           } else {
741fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
742fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
743fef45726SSatish Balay                 baij_a[jj]  = *value++;
744fef45726SSatish Balay               }
745b4cc0f5aSSatish Balay             }
7460bdbc534SSatish Balay           }
7470bdbc534SSatish Balay         }
7480bdbc534SSatish Balay       }
7490bdbc534SSatish Balay     } else {
7500bdbc534SSatish Balay       if (!baij->donotstash) {
7510bdbc534SSatish Balay         if (roworiented) {
7528798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7530bdbc534SSatish Balay         } else {
7548798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7550bdbc534SSatish Balay         }
7560bdbc534SSatish Balay       }
7570bdbc534SSatish Balay     }
7580bdbc534SSatish Balay   }
7592515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
760187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
761187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
762187ce0cbSSatish Balay #endif
7630bdbc534SSatish Balay   PetscFunctionReturn(0);
7640bdbc534SSatish Balay }
765133cdb44SSatish Balay 
7664a2ae208SSatish Balay #undef __FUNCT__
7674a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ"
768b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
769d6de1c52SSatish Balay {
770d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
7716849ba73SBarry Smith   PetscErrorCode ierr;
772521d7252SBarry Smith   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
773b24ad042SBarry Smith   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
774d6de1c52SSatish Balay 
775133cdb44SSatish Balay   PetscFunctionBegin;
776d6de1c52SSatish Balay   for (i=0; i<m; i++) {
77777431f27SBarry Smith     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
77877431f27SBarry Smith     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
779d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
780d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
781d6de1c52SSatish Balay       for (j=0; j<n; j++) {
78277431f27SBarry Smith         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
78377431f27SBarry Smith         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
784d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
785d6de1c52SSatish Balay           col = idxn[j] - bscstart;
78698dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787d64ed03dSBarry Smith         } else {
788905e6a2fSBarry Smith           if (!baij->colmap) {
789905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
790905e6a2fSBarry Smith           }
791aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7920f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
793fa46199cSSatish Balay           data --;
79448e59246SSatish Balay #else
79548e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
79648e59246SSatish Balay #endif
79748e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
798d9d09a02SSatish Balay           else {
79948e59246SSatish Balay             col  = data + idxn[j]%bs;
80098dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
801d6de1c52SSatish Balay           }
802d6de1c52SSatish Balay         }
803d6de1c52SSatish Balay       }
804d64ed03dSBarry Smith     } else {
80529bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
806d6de1c52SSatish Balay     }
807d6de1c52SSatish Balay   }
8083a40ed3dSBarry Smith  PetscFunctionReturn(0);
809d6de1c52SSatish Balay }
810d6de1c52SSatish Balay 
8114a2ae208SSatish Balay #undef __FUNCT__
8124a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ"
813dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
814d6de1c52SSatish Balay {
815d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
816d6de1c52SSatish Balay   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
817dfbe8321SBarry Smith   PetscErrorCode ierr;
8188a62d963SHong Zhang   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col;
819329f5518SBarry Smith   PetscReal      sum = 0.0;
8203eda8832SBarry Smith   MatScalar      *v;
821d6de1c52SSatish Balay 
822d64ed03dSBarry Smith   PetscFunctionBegin;
823d6de1c52SSatish Balay   if (baij->size == 1) {
824064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
825d6de1c52SSatish Balay   } else {
826d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
827d6de1c52SSatish Balay       v = amat->a;
8288a62d963SHong Zhang       nz = amat->nz*bs2;
8298a62d963SHong Zhang       for (i=0; i<nz; i++) {
830aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
831329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
832d6de1c52SSatish Balay #else
833d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
834d6de1c52SSatish Balay #endif
835d6de1c52SSatish Balay       }
836d6de1c52SSatish Balay       v = bmat->a;
8378a62d963SHong Zhang       nz = bmat->nz*bs2;
8388a62d963SHong Zhang       for (i=0; i<nz; i++) {
839aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
840329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
841d6de1c52SSatish Balay #else
842d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
843d6de1c52SSatish Balay #endif
844d6de1c52SSatish Balay       }
845064f8208SBarry Smith       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
846064f8208SBarry Smith       *nrm = sqrt(*nrm);
8478a62d963SHong Zhang     } else if (type == NORM_1) { /* max column sum */
8488a62d963SHong Zhang       PetscReal *tmp,*tmp2;
8498a62d963SHong Zhang       PetscInt  *jj,*garray=baij->garray,cstart=baij->cstart;
8508a62d963SHong Zhang       ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
8518a62d963SHong Zhang       tmp2 = tmp + mat->N;
8528a62d963SHong Zhang       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
8538a62d963SHong Zhang       v = amat->a; jj = amat->j;
8548a62d963SHong Zhang       for (i=0; i<amat->nz; i++) {
8558a62d963SHong Zhang         for (j=0; j<bs; j++){
8568a62d963SHong Zhang           col = bs*(cstart + *jj) + j; /* column index */
8578a62d963SHong Zhang           for (row=0; row<bs; row++){
8588a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v);  v++;
8598a62d963SHong Zhang           }
8608a62d963SHong Zhang         }
8618a62d963SHong Zhang         jj++;
8628a62d963SHong Zhang       }
8638a62d963SHong Zhang       v = bmat->a; jj = bmat->j;
8648a62d963SHong Zhang       for (i=0; i<bmat->nz; i++) {
8658a62d963SHong Zhang         for (j=0; j<bs; j++){
8668a62d963SHong Zhang           col = bs*garray[*jj] + j;
8678a62d963SHong Zhang           for (row=0; row<bs; row++){
8688a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v); v++;
8698a62d963SHong Zhang           }
8708a62d963SHong Zhang         }
8718a62d963SHong Zhang         jj++;
8728a62d963SHong Zhang       }
8738a62d963SHong Zhang       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
8748a62d963SHong Zhang       *nrm = 0.0;
8758a62d963SHong Zhang       for (j=0; j<mat->N; j++) {
8768a62d963SHong Zhang         if (tmp2[j] > *nrm) *nrm = tmp2[j];
8778a62d963SHong Zhang       }
8788a62d963SHong Zhang       ierr = PetscFree(tmp);CHKERRQ(ierr);
8798a62d963SHong Zhang     } else if (type == NORM_INFINITY) { /* max row sum */
880577dd1f9SKris Buschelman       PetscReal *sums;
881577dd1f9SKris Buschelman       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
8828a62d963SHong Zhang       sum = 0.0;
8838a62d963SHong Zhang       for (j=0; j<amat->mbs; j++) {
8848a62d963SHong Zhang         for (row=0; row<bs; row++) sums[row] = 0.0;
8858a62d963SHong Zhang         v = amat->a + bs2*amat->i[j];
8868a62d963SHong Zhang         nz = amat->i[j+1]-amat->i[j];
8878a62d963SHong Zhang         for (i=0; i<nz; i++) {
8888a62d963SHong Zhang           for (col=0; col<bs; col++){
8898a62d963SHong Zhang             for (row=0; row<bs; row++){
8908a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
8918a62d963SHong Zhang             }
8928a62d963SHong Zhang           }
8938a62d963SHong Zhang         }
8948a62d963SHong Zhang         v = bmat->a + bs2*bmat->i[j];
8958a62d963SHong Zhang         nz = bmat->i[j+1]-bmat->i[j];
8968a62d963SHong Zhang         for (i=0; i<nz; i++) {
8978a62d963SHong Zhang           for (col=0; col<bs; col++){
8988a62d963SHong Zhang             for (row=0; row<bs; row++){
8998a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
9008a62d963SHong Zhang             }
9018a62d963SHong Zhang           }
9028a62d963SHong Zhang         }
9038a62d963SHong Zhang         for (row=0; row<bs; row++){
9048a62d963SHong Zhang           if (sums[row] > sum) sum = sums[row];
9058a62d963SHong Zhang         }
9068a62d963SHong Zhang       }
9078a62d963SHong Zhang       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
908577dd1f9SKris Buschelman       ierr = PetscFree(sums);CHKERRQ(ierr);
909d64ed03dSBarry Smith     } else {
91029bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
911d6de1c52SSatish Balay     }
912d64ed03dSBarry Smith   }
9133a40ed3dSBarry Smith   PetscFunctionReturn(0);
914d6de1c52SSatish Balay }
91557b952d6SSatish Balay 
916fef45726SSatish Balay /*
917fef45726SSatish Balay   Creates the hash table, and sets the table
918fef45726SSatish Balay   This table is created only once.
919fef45726SSatish Balay   If new entried need to be added to the matrix
920fef45726SSatish Balay   then the hash table has to be destroyed and
921fef45726SSatish Balay   recreated.
922fef45726SSatish Balay */
9234a2ae208SSatish Balay #undef __FUNCT__
9244a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
925dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
926596b8d2eSBarry Smith {
927596b8d2eSBarry Smith   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
928596b8d2eSBarry Smith   Mat            A = baij->A,B=baij->B;
929596b8d2eSBarry Smith   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
930b24ad042SBarry Smith   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
9316849ba73SBarry Smith   PetscErrorCode ierr;
932b24ad042SBarry Smith   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
933b24ad042SBarry Smith   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
934b24ad042SBarry Smith   PetscInt       *HT,key;
9353eda8832SBarry Smith   MatScalar      **HD;
936329f5518SBarry Smith   PetscReal      tmp;
9372515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
938b24ad042SBarry Smith   PetscInt       ct=0,max=0;
9394a15367fSSatish Balay #endif
940fef45726SSatish Balay 
941d64ed03dSBarry Smith   PetscFunctionBegin;
942b24ad042SBarry Smith   baij->ht_size=(PetscInt)(factor*nz);
9430bdbc534SSatish Balay   size = baij->ht_size;
944fef45726SSatish Balay 
9450bdbc534SSatish Balay   if (baij->ht) {
9460bdbc534SSatish Balay     PetscFunctionReturn(0);
947596b8d2eSBarry Smith   }
9480bdbc534SSatish Balay 
949fef45726SSatish Balay   /* Allocate Memory for Hash Table */
950b24ad042SBarry Smith   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
951b24ad042SBarry Smith   baij->ht = (PetscInt*)(baij->hd + size);
952b9e4cc15SSatish Balay   HD       = baij->hd;
953a07cd24cSSatish Balay   HT       = baij->ht;
954b9e4cc15SSatish Balay 
955b9e4cc15SSatish Balay 
956b24ad042SBarry Smith   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
9570bdbc534SSatish Balay 
958596b8d2eSBarry Smith 
959596b8d2eSBarry Smith   /* Loop Over A */
9600bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
961596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
9620bdbc534SSatish Balay       row = i+rstart;
9630bdbc534SSatish Balay       col = aj[j]+cstart;
964596b8d2eSBarry Smith 
965187ce0cbSSatish Balay       key = row*Nbs + col + 1;
966c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9670bdbc534SSatish Balay       for (k=0; k<size; k++){
968958c9bccSBarry Smith         if (!HT[(h1+k)%size]) {
9690bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9700bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
971596b8d2eSBarry Smith           break;
9722515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
973187ce0cbSSatish Balay         } else {
974187ce0cbSSatish Balay           ct++;
975187ce0cbSSatish Balay #endif
976596b8d2eSBarry Smith         }
977187ce0cbSSatish Balay       }
9782515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
979187ce0cbSSatish Balay       if (k> max) max = k;
980187ce0cbSSatish Balay #endif
981596b8d2eSBarry Smith     }
982596b8d2eSBarry Smith   }
983596b8d2eSBarry Smith   /* Loop Over B */
9840bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
985596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
9860bdbc534SSatish Balay       row = i+rstart;
9870bdbc534SSatish Balay       col = garray[bj[j]];
988187ce0cbSSatish Balay       key = row*Nbs + col + 1;
989c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9900bdbc534SSatish Balay       for (k=0; k<size; k++){
991958c9bccSBarry Smith         if (!HT[(h1+k)%size]) {
9920bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9930bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
994596b8d2eSBarry Smith           break;
9952515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
996187ce0cbSSatish Balay         } else {
997187ce0cbSSatish Balay           ct++;
998187ce0cbSSatish Balay #endif
999596b8d2eSBarry Smith         }
1000187ce0cbSSatish Balay       }
10012515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
1002187ce0cbSSatish Balay       if (k> max) max = k;
1003187ce0cbSSatish Balay #endif
1004596b8d2eSBarry Smith     }
1005596b8d2eSBarry Smith   }
1006596b8d2eSBarry Smith 
1007596b8d2eSBarry Smith   /* Print Summary */
10082515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
1009c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
1010596b8d2eSBarry Smith     if (HT[i]) {j++;}
1011c38d4ed2SBarry Smith   }
101263ba0a88SBarry Smith   ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr);
1013187ce0cbSSatish Balay #endif
10143a40ed3dSBarry Smith   PetscFunctionReturn(0);
1015596b8d2eSBarry Smith }
101657b952d6SSatish Balay 
10174a2ae208SSatish Balay #undef __FUNCT__
10184a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
1019dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
1020bbb85fb3SSatish Balay {
1021bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1022dfbe8321SBarry Smith   PetscErrorCode ierr;
1023b24ad042SBarry Smith   PetscInt       nstash,reallocs;
1024bbb85fb3SSatish Balay   InsertMode     addv;
1025bbb85fb3SSatish Balay 
1026bbb85fb3SSatish Balay   PetscFunctionBegin;
1027bbb85fb3SSatish Balay   if (baij->donotstash) {
1028bbb85fb3SSatish Balay     PetscFunctionReturn(0);
1029bbb85fb3SSatish Balay   }
1030bbb85fb3SSatish Balay 
1031bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
1032bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
1033bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
103429bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1035bbb85fb3SSatish Balay   }
1036bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
1037bbb85fb3SSatish Balay 
10388798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
10398798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
10408798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
104163ba0a88SBarry Smith   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
104246680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
104363ba0a88SBarry Smith   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1044bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1045bbb85fb3SSatish Balay }
1046bbb85fb3SSatish Balay 
10474a2ae208SSatish Balay #undef __FUNCT__
10484a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
1049dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
1050bbb85fb3SSatish Balay {
1051bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
1052a2d1c673SSatish Balay   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
10536849ba73SBarry Smith   PetscErrorCode ierr;
1054b24ad042SBarry Smith   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
1055b24ad042SBarry Smith   PetscInt       *row,*col,other_disassembled;
10567c922b88SBarry Smith   PetscTruth     r1,r2,r3;
10573eda8832SBarry Smith   MatScalar      *val;
1058bbb85fb3SSatish Balay   InsertMode     addv = mat->insertmode;
1059b24ad042SBarry Smith   PetscMPIInt    n;
1060bbb85fb3SSatish Balay 
1061bbb85fb3SSatish Balay   PetscFunctionBegin;
1062bbb85fb3SSatish Balay   if (!baij->donotstash) {
1063a2d1c673SSatish Balay     while (1) {
10648798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1065a2d1c673SSatish Balay       if (!flg) break;
1066a2d1c673SSatish Balay 
1067bbb85fb3SSatish Balay       for (i=0; i<n;) {
1068bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1069bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1070bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
1071bbb85fb3SSatish Balay         else       ncols = n-i;
1072bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
107393fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1074bbb85fb3SSatish Balay         i = j;
1075bbb85fb3SSatish Balay       }
1076bbb85fb3SSatish Balay     }
10778798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1078a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1079a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1080a2d1c673SSatish Balay        restore the original flags */
1081a2d1c673SSatish Balay     r1 = baij->roworiented;
1082a2d1c673SSatish Balay     r2 = a->roworiented;
1083a2d1c673SSatish Balay     r3 = b->roworiented;
10847c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
10857c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
10867c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
1087a2d1c673SSatish Balay     while (1) {
10888798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1089a2d1c673SSatish Balay       if (!flg) break;
1090a2d1c673SSatish Balay 
1091a2d1c673SSatish Balay       for (i=0; i<n;) {
1092a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1093a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1094a2d1c673SSatish Balay         if (j < n) ncols = j-i;
1095a2d1c673SSatish Balay         else       ncols = n-i;
109693fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1097a2d1c673SSatish Balay         i = j;
1098a2d1c673SSatish Balay       }
1099a2d1c673SSatish Balay     }
11008798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1101a2d1c673SSatish Balay     baij->roworiented = r1;
1102a2d1c673SSatish Balay     a->roworiented    = r2;
1103a2d1c673SSatish Balay     b->roworiented    = r3;
1104bbb85fb3SSatish Balay   }
1105bbb85fb3SSatish Balay 
1106bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1107bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1108bbb85fb3SSatish Balay 
1109bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
1110bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1111bbb85fb3SSatish Balay   /*
1112bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1113bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1114bbb85fb3SSatish Balay   */
1115bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1116bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1117bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1118bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1119bbb85fb3SSatish Balay     }
1120bbb85fb3SSatish Balay   }
1121bbb85fb3SSatish Balay 
1122bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1123bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1124bbb85fb3SSatish Balay   }
112573e7a558SHong Zhang   b->compressedrow.use = PETSC_TRUE;
1126bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1127bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1128bbb85fb3SSatish Balay 
11292515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
1130bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
113163ba0a88SBarry Smith     ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr);
1132bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1133bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1134bbb85fb3SSatish Balay   }
1135bbb85fb3SSatish Balay #endif
1136bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1137bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1138bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1139bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1140bbb85fb3SSatish Balay   }
1141bbb85fb3SSatish Balay 
1142606d414cSSatish Balay   if (baij->rowvalues) {
1143606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1144606d414cSSatish Balay     baij->rowvalues = 0;
1145606d414cSSatish Balay   }
1146bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1147bbb85fb3SSatish Balay }
114857b952d6SSatish Balay 
11494a2ae208SSatish Balay #undef __FUNCT__
11504a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
11516849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
115257b952d6SSatish Balay {
115357b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1154dfbe8321SBarry Smith   PetscErrorCode    ierr;
1155b24ad042SBarry Smith   PetscMPIInt       size = baij->size,rank = baij->rank;
1156521d7252SBarry Smith   PetscInt          bs = mat->bs;
115732077d6dSBarry Smith   PetscTruth        iascii,isdraw;
1158b0a32e0cSBarry Smith   PetscViewer       sviewer;
1159f3ef73ceSBarry Smith   PetscViewerFormat format;
116057b952d6SSatish Balay 
1161d64ed03dSBarry Smith   PetscFunctionBegin;
1162f81bd7ccSHong Zhang   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
116332077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1164fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
116532077d6dSBarry Smith   if (iascii) {
1166b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1167456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
11684e220ebcSLois Curfman McInnes       MatInfo info;
1169d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1170d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
117177431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
117277431f27SBarry Smith               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1173521d7252SBarry Smith               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1174d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
117577431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1176d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
117777431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1178b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
117957b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
11803a40ed3dSBarry Smith       PetscFunctionReturn(0);
1181fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
118277431f27SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
11833a40ed3dSBarry Smith       PetscFunctionReturn(0);
118404929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
118504929863SHong Zhang       PetscFunctionReturn(0);
118657b952d6SSatish Balay     }
118757b952d6SSatish Balay   }
118857b952d6SSatish Balay 
11890f5bd95cSBarry Smith   if (isdraw) {
1190b0a32e0cSBarry Smith     PetscDraw       draw;
119157b952d6SSatish Balay     PetscTruth isnull;
1192b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1193b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
119457b952d6SSatish Balay   }
119557b952d6SSatish Balay 
119657b952d6SSatish Balay   if (size == 1) {
1197873048abSBarry Smith     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
119857b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1199d64ed03dSBarry Smith   } else {
120057b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
120157b952d6SSatish Balay     Mat         A;
120257b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1203b24ad042SBarry Smith     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
12043eda8832SBarry Smith     MatScalar   *a;
120557b952d6SSatish Balay 
1206f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1207f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
120857b952d6SSatish Balay     if (!rank) {
1209f204ca49SKris Buschelman       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1210d64ed03dSBarry Smith     } else {
1211f204ca49SKris Buschelman       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
121257b952d6SSatish Balay     }
1213f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1214521d7252SBarry Smith     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
121552e6d16bSBarry Smith     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
121657b952d6SSatish Balay 
121757b952d6SSatish Balay     /* copy over the A part */
121857b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
121957b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1220b24ad042SBarry Smith     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
122157b952d6SSatish Balay 
122257b952d6SSatish Balay     for (i=0; i<mbs; i++) {
122357b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
122457b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
122557b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
122657b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
122757b952d6SSatish Balay         for (k=0; k<bs; k++) {
122893fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1229cee3aa6bSSatish Balay           col++; a += bs;
123057b952d6SSatish Balay         }
123157b952d6SSatish Balay       }
123257b952d6SSatish Balay     }
123357b952d6SSatish Balay     /* copy over the B part */
123457b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
123557b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
123657b952d6SSatish Balay     for (i=0; i<mbs; i++) {
123757b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
123857b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
123957b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
124057b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
124157b952d6SSatish Balay         for (k=0; k<bs; k++) {
124293fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1243cee3aa6bSSatish Balay           col++; a += bs;
124457b952d6SSatish Balay         }
124557b952d6SSatish Balay       }
124657b952d6SSatish Balay     }
1247606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
12486d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
12496d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
125055843e3eSBarry Smith     /*
125155843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1252b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
125355843e3eSBarry Smith     */
1254b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1255f14a1c24SBarry Smith     if (!rank) {
1256e36acaf3SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
12576831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
125857b952d6SSatish Balay     }
1259b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
126057b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
126157b952d6SSatish Balay   }
12623a40ed3dSBarry Smith   PetscFunctionReturn(0);
126357b952d6SSatish Balay }
126457b952d6SSatish Balay 
12654a2ae208SSatish Balay #undef __FUNCT__
12664a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ"
1267dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
126857b952d6SSatish Balay {
1269dfbe8321SBarry Smith   PetscErrorCode ierr;
127032077d6dSBarry Smith   PetscTruth     iascii,isdraw,issocket,isbinary;
127157b952d6SSatish Balay 
1272d64ed03dSBarry Smith   PetscFunctionBegin;
127332077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1274fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1275b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1276fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
127732077d6dSBarry Smith   if (iascii || isdraw || issocket || isbinary) {
12787b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
12795cd90555SBarry Smith   } else {
128079a5c55eSBarry Smith     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
128157b952d6SSatish Balay   }
12823a40ed3dSBarry Smith   PetscFunctionReturn(0);
128357b952d6SSatish Balay }
128457b952d6SSatish Balay 
12854a2ae208SSatish Balay #undef __FUNCT__
12864a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ"
1287dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
128879bdfe76SSatish Balay {
128979bdfe76SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1290dfbe8321SBarry Smith   PetscErrorCode ierr;
129179bdfe76SSatish Balay 
1292d64ed03dSBarry Smith   PetscFunctionBegin;
1293aa482453SBarry Smith #if defined(PETSC_USE_LOG)
129477431f27SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
129579bdfe76SSatish Balay #endif
12968798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12978798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1298606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
129979bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
130079bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1301aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
13020f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
130348e59246SSatish Balay #else
1304606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
130548e59246SSatish Balay #endif
1306606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1307606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1308606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1309606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1310606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1311606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
13126fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
13136fa18ffdSBarry Smith   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
13146fa18ffdSBarry Smith #endif
1315606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
1316901853e0SKris Buschelman 
1317901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1318901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1319901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1320901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1321aac34f13SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1322901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1323901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
13243a40ed3dSBarry Smith   PetscFunctionReturn(0);
132579bdfe76SSatish Balay }
132679bdfe76SSatish Balay 
13274a2ae208SSatish Balay #undef __FUNCT__
13284a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ"
1329dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1330cee3aa6bSSatish Balay {
1331cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1332dfbe8321SBarry Smith   PetscErrorCode ierr;
1333b24ad042SBarry Smith   PetscInt       nt;
1334cee3aa6bSSatish Balay 
1335d64ed03dSBarry Smith   PetscFunctionBegin;
1336e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1337273d9f13SBarry Smith   if (nt != A->n) {
133829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
133947b4a8eaSLois Curfman McInnes   }
1340e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1341273d9f13SBarry Smith   if (nt != A->m) {
134229bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
134347b4a8eaSLois Curfman McInnes   }
134443a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1345f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
134643a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1347f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
134843a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
13493a40ed3dSBarry Smith   PetscFunctionReturn(0);
1350cee3aa6bSSatish Balay }
1351cee3aa6bSSatish Balay 
13524a2ae208SSatish Balay #undef __FUNCT__
13534a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1354dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1355cee3aa6bSSatish Balay {
1356cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1357dfbe8321SBarry Smith   PetscErrorCode ierr;
1358d64ed03dSBarry Smith 
1359d64ed03dSBarry Smith   PetscFunctionBegin;
136043a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1361f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
136243a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1363f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
13643a40ed3dSBarry Smith   PetscFunctionReturn(0);
1365cee3aa6bSSatish Balay }
1366cee3aa6bSSatish Balay 
13674a2ae208SSatish Balay #undef __FUNCT__
13684a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1369dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1370cee3aa6bSSatish Balay {
1371cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1372dfbe8321SBarry Smith   PetscErrorCode ierr;
1373a5ff213dSBarry Smith   PetscTruth     merged;
1374cee3aa6bSSatish Balay 
1375d64ed03dSBarry Smith   PetscFunctionBegin;
1376a5ff213dSBarry Smith   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1377cee3aa6bSSatish Balay   /* do nondiagonal part */
13787c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1379a5ff213dSBarry Smith   if (!merged) {
1380cee3aa6bSSatish Balay     /* send it on its way */
1381537820f0SBarry Smith     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1382cee3aa6bSSatish Balay     /* do local part */
13837c922b88SBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1384cee3aa6bSSatish Balay     /* receive remote parts: note this assumes the values are not actually */
1385a5ff213dSBarry Smith     /* inserted in yy until the next line */
1386639f9d9dSBarry Smith     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1387a5ff213dSBarry Smith   } else {
1388a5ff213dSBarry Smith     /* do local part */
1389a5ff213dSBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1390a5ff213dSBarry Smith     /* send it on its way */
1391a5ff213dSBarry Smith     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1392a5ff213dSBarry Smith     /* values actually were received in the Begin() but we need to call this nop */
1393a5ff213dSBarry Smith     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1394a5ff213dSBarry Smith   }
13953a40ed3dSBarry Smith   PetscFunctionReturn(0);
1396cee3aa6bSSatish Balay }
1397cee3aa6bSSatish Balay 
13984a2ae208SSatish Balay #undef __FUNCT__
13994a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1400dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1401cee3aa6bSSatish Balay {
1402cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1403dfbe8321SBarry Smith   PetscErrorCode ierr;
1404cee3aa6bSSatish Balay 
1405d64ed03dSBarry Smith   PetscFunctionBegin;
1406cee3aa6bSSatish Balay   /* do nondiagonal part */
14077c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1408cee3aa6bSSatish Balay   /* send it on its way */
1409537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1410cee3aa6bSSatish Balay   /* do local part */
14117c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1412cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1413cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1414cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1415537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
14163a40ed3dSBarry Smith   PetscFunctionReturn(0);
1417cee3aa6bSSatish Balay }
1418cee3aa6bSSatish Balay 
1419cee3aa6bSSatish Balay /*
1420cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1421cee3aa6bSSatish Balay    diagonal block
1422cee3aa6bSSatish Balay */
14234a2ae208SSatish Balay #undef __FUNCT__
14244a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1425dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1426cee3aa6bSSatish Balay {
1427cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1428dfbe8321SBarry Smith   PetscErrorCode ierr;
1429d64ed03dSBarry Smith 
1430d64ed03dSBarry Smith   PetscFunctionBegin;
1431273d9f13SBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
14323a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
14333a40ed3dSBarry Smith   PetscFunctionReturn(0);
1434cee3aa6bSSatish Balay }
1435cee3aa6bSSatish Balay 
14364a2ae208SSatish Balay #undef __FUNCT__
14374a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ"
1438dfbe8321SBarry Smith PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1439cee3aa6bSSatish Balay {
1440cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1441dfbe8321SBarry Smith   PetscErrorCode ierr;
1442d64ed03dSBarry Smith 
1443d64ed03dSBarry Smith   PetscFunctionBegin;
1444cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1445cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
14463a40ed3dSBarry Smith   PetscFunctionReturn(0);
1447cee3aa6bSSatish Balay }
1448026e39d0SSatish Balay 
14494a2ae208SSatish Balay #undef __FUNCT__
14504a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ"
1451b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1452acdf5bf4SSatish Balay {
1453acdf5bf4SSatish Balay   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
145487828ca2SBarry Smith   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
14556849ba73SBarry Smith   PetscErrorCode ierr;
1456521d7252SBarry Smith   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1457b24ad042SBarry Smith   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1458b24ad042SBarry Smith   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1459acdf5bf4SSatish Balay 
1460d64ed03dSBarry Smith   PetscFunctionBegin;
1461abc0a331SBarry Smith   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1462acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1463acdf5bf4SSatish Balay 
1464acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1465acdf5bf4SSatish Balay     /*
1466acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1467acdf5bf4SSatish Balay     */
1468acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1469b24ad042SBarry Smith     PetscInt     max = 1,mbs = mat->mbs,tmp;
1470bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1471acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1472acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1473acdf5bf4SSatish Balay     }
1474b24ad042SBarry Smith     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1475b24ad042SBarry Smith     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1476acdf5bf4SSatish Balay   }
1477acdf5bf4SSatish Balay 
147829bbc08cSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1479d9d09a02SSatish Balay   lrow = row - brstart;
1480acdf5bf4SSatish Balay 
1481acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1482acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1483acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1484f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1485f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1486acdf5bf4SSatish Balay   nztot = nzA + nzB;
1487acdf5bf4SSatish Balay 
1488acdf5bf4SSatish Balay   cmap  = mat->garray;
1489acdf5bf4SSatish Balay   if (v  || idx) {
1490acdf5bf4SSatish Balay     if (nztot) {
1491acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1492b24ad042SBarry Smith       PetscInt imark = -1;
1493acdf5bf4SSatish Balay       if (v) {
1494acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1495acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1496d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1497acdf5bf4SSatish Balay           else break;
1498acdf5bf4SSatish Balay         }
1499acdf5bf4SSatish Balay         imark = i;
1500acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1501acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1502acdf5bf4SSatish Balay       }
1503acdf5bf4SSatish Balay       if (idx) {
1504acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1505acdf5bf4SSatish Balay         if (imark > -1) {
1506acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1507bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1508acdf5bf4SSatish Balay           }
1509acdf5bf4SSatish Balay         } else {
1510acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1511d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1512d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1513acdf5bf4SSatish Balay             else break;
1514acdf5bf4SSatish Balay           }
1515acdf5bf4SSatish Balay           imark = i;
1516acdf5bf4SSatish Balay         }
1517d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1518d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1519acdf5bf4SSatish Balay       }
1520d64ed03dSBarry Smith     } else {
1521d212a18eSSatish Balay       if (idx) *idx = 0;
1522d212a18eSSatish Balay       if (v)   *v   = 0;
1523d212a18eSSatish Balay     }
1524acdf5bf4SSatish Balay   }
1525acdf5bf4SSatish Balay   *nz = nztot;
1526f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1527f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
15283a40ed3dSBarry Smith   PetscFunctionReturn(0);
1529acdf5bf4SSatish Balay }
1530acdf5bf4SSatish Balay 
15314a2ae208SSatish Balay #undef __FUNCT__
15324a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1533b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1534acdf5bf4SSatish Balay {
1535acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1536d64ed03dSBarry Smith 
1537d64ed03dSBarry Smith   PetscFunctionBegin;
1538abc0a331SBarry Smith   if (!baij->getrowactive) {
153929bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1540acdf5bf4SSatish Balay   }
1541acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
15423a40ed3dSBarry Smith   PetscFunctionReturn(0);
1543acdf5bf4SSatish Balay }
1544acdf5bf4SSatish Balay 
15454a2ae208SSatish Balay #undef __FUNCT__
15464a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1547dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
154858667388SSatish Balay {
154958667388SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1550dfbe8321SBarry Smith   PetscErrorCode ierr;
1551d64ed03dSBarry Smith 
1552d64ed03dSBarry Smith   PetscFunctionBegin;
155358667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
155458667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
15553a40ed3dSBarry Smith   PetscFunctionReturn(0);
155658667388SSatish Balay }
15570ac07820SSatish Balay 
15584a2ae208SSatish Balay #undef __FUNCT__
15594a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1560dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
15610ac07820SSatish Balay {
15624e220ebcSLois Curfman McInnes   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
15634e220ebcSLois Curfman McInnes   Mat            A = a->A,B = a->B;
1564dfbe8321SBarry Smith   PetscErrorCode ierr;
1565329f5518SBarry Smith   PetscReal      isend[5],irecv[5];
15660ac07820SSatish Balay 
1567d64ed03dSBarry Smith   PetscFunctionBegin;
1568521d7252SBarry Smith   info->block_size     = (PetscReal)matin->bs;
15694e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
15700e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1571de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
15724e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
15730e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1574de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
15750ac07820SSatish Balay   if (flag == MAT_LOCAL) {
15764e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
15774e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
15784e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
15794e220ebcSLois Curfman McInnes     info->memory       = isend[3];
15804e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
15810ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1582d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
15834e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15844e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15854e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15864e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15874e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
15880ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1589d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
15904e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15914e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15924e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15934e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15944e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1595d41123aaSBarry Smith   } else {
159677431f27SBarry Smith     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
15970ac07820SSatish Balay   }
1598f6275e2eSBarry Smith   info->rows_global       = (PetscReal)A->M;
1599f6275e2eSBarry Smith   info->columns_global    = (PetscReal)A->N;
1600f6275e2eSBarry Smith   info->rows_local        = (PetscReal)A->m;
1601f6275e2eSBarry Smith   info->columns_local     = (PetscReal)A->N;
16024e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
16034e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
16044e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
16053a40ed3dSBarry Smith   PetscFunctionReturn(0);
16060ac07820SSatish Balay }
16070ac07820SSatish Balay 
16084a2ae208SSatish Balay #undef __FUNCT__
16094a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ"
1610dfbe8321SBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
161158667388SSatish Balay {
161258667388SSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1613dfbe8321SBarry Smith   PetscErrorCode ierr;
161458667388SSatish Balay 
1615d64ed03dSBarry Smith   PetscFunctionBegin;
161612c028f9SKris Buschelman   switch (op) {
161712c028f9SKris Buschelman   case MAT_NO_NEW_NONZERO_LOCATIONS:
161812c028f9SKris Buschelman   case MAT_YES_NEW_NONZERO_LOCATIONS:
161912c028f9SKris Buschelman   case MAT_COLUMNS_UNSORTED:
162012c028f9SKris Buschelman   case MAT_COLUMNS_SORTED:
162112c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
162212c028f9SKris Buschelman   case MAT_KEEP_ZEROED_ROWS:
162312c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
162498305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
162598305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
162612c028f9SKris Buschelman     break;
162712c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
16287c922b88SBarry Smith     a->roworiented = PETSC_TRUE;
162998305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
163098305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
163112c028f9SKris Buschelman     break;
163212c028f9SKris Buschelman   case MAT_ROWS_SORTED:
163312c028f9SKris Buschelman   case MAT_ROWS_UNSORTED:
163412c028f9SKris Buschelman   case MAT_YES_NEW_DIAGONALS:
163563ba0a88SBarry Smith     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
163612c028f9SKris Buschelman     break;
163712c028f9SKris Buschelman   case MAT_COLUMN_ORIENTED:
16387c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
163998305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
164098305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
164112c028f9SKris Buschelman     break;
164212c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
16437c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
164412c028f9SKris Buschelman     break;
164512c028f9SKris Buschelman   case MAT_NO_NEW_DIAGONALS:
164629bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
164712c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
16487c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
164912c028f9SKris Buschelman     break;
165077e54ba9SKris Buschelman   case MAT_SYMMETRIC:
165177e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
16522188ac68SBarry Smith   case MAT_HERMITIAN:
16532188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
16542188ac68SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
16552188ac68SBarry Smith     break;
16569a4540c5SBarry Smith   case MAT_NOT_SYMMETRIC:
16579a4540c5SBarry Smith   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
16589a4540c5SBarry Smith   case MAT_NOT_HERMITIAN:
16599a4540c5SBarry Smith   case MAT_NOT_SYMMETRY_ETERNAL:
166077e54ba9SKris Buschelman     break;
166112c028f9SKris Buschelman   default:
166229bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"unknown option");
1663d64ed03dSBarry Smith   }
16643a40ed3dSBarry Smith   PetscFunctionReturn(0);
166558667388SSatish Balay }
166658667388SSatish Balay 
16674a2ae208SSatish Balay #undef __FUNCT__
16684a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ("
1669dfbe8321SBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
16700ac07820SSatish Balay {
16710ac07820SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
16720ac07820SSatish Balay   Mat_SeqBAIJ    *Aloc;
16730ac07820SSatish Balay   Mat            B;
1674dfbe8321SBarry Smith   PetscErrorCode ierr;
1675b24ad042SBarry Smith   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1676521d7252SBarry Smith   PetscInt       bs=A->bs,mbs=baij->mbs;
16773eda8832SBarry Smith   MatScalar      *a;
16780ac07820SSatish Balay 
1679d64ed03dSBarry Smith   PetscFunctionBegin;
168029bbc08cSBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1681f204ca49SKris Buschelman   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1682f204ca49SKris Buschelman   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1683521d7252SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
16840ac07820SSatish Balay 
16850ac07820SSatish Balay   /* copy over the A part */
16860ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
16870ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1688b24ad042SBarry Smith   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
16890ac07820SSatish Balay 
16900ac07820SSatish Balay   for (i=0; i<mbs; i++) {
16910ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16920ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16930ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16940ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
16950ac07820SSatish Balay       for (k=0; k<bs; k++) {
169693fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16970ac07820SSatish Balay         col++; a += bs;
16980ac07820SSatish Balay       }
16990ac07820SSatish Balay     }
17000ac07820SSatish Balay   }
17010ac07820SSatish Balay   /* copy over the B part */
17020ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
17030ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
17040ac07820SSatish Balay   for (i=0; i<mbs; i++) {
17050ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
17060ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
17070ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
17080ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
17090ac07820SSatish Balay       for (k=0; k<bs; k++) {
171093fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
17110ac07820SSatish Balay         col++; a += bs;
17120ac07820SSatish Balay       }
17130ac07820SSatish Balay     }
17140ac07820SSatish Balay   }
1715606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
17160ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17170ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17180ac07820SSatish Balay 
17197c922b88SBarry Smith   if (matout) {
17200ac07820SSatish Balay     *matout = B;
17210ac07820SSatish Balay   } else {
1722273d9f13SBarry Smith     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
17230ac07820SSatish Balay   }
17243a40ed3dSBarry Smith   PetscFunctionReturn(0);
17250ac07820SSatish Balay }
17260e95ebc0SSatish Balay 
17274a2ae208SSatish Balay #undef __FUNCT__
17284a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1729dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
17300e95ebc0SSatish Balay {
173136c4a09eSSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
173236c4a09eSSatish Balay   Mat            a = baij->A,b = baij->B;
1733dfbe8321SBarry Smith   PetscErrorCode ierr;
1734b24ad042SBarry Smith   PetscInt       s1,s2,s3;
17350e95ebc0SSatish Balay 
1736d64ed03dSBarry Smith   PetscFunctionBegin;
173736c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
173836c4a09eSSatish Balay   if (rr) {
173936c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
174029bbc08cSBarry Smith     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
174136c4a09eSSatish Balay     /* Overlap communication with computation. */
174236c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
174336c4a09eSSatish Balay   }
17440e95ebc0SSatish Balay   if (ll) {
17450e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
174629bbc08cSBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1747a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
17480e95ebc0SSatish Balay   }
174936c4a09eSSatish Balay   /* scale  the diagonal block */
175036c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
175136c4a09eSSatish Balay 
175236c4a09eSSatish Balay   if (rr) {
175336c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
175436c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1755a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
175636c4a09eSSatish Balay   }
175736c4a09eSSatish Balay 
17583a40ed3dSBarry Smith   PetscFunctionReturn(0);
17590e95ebc0SSatish Balay }
17600e95ebc0SSatish Balay 
17614a2ae208SSatish Balay #undef __FUNCT__
17624a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1763dfbe8321SBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
17640ac07820SSatish Balay {
17650ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
17666849ba73SBarry Smith   PetscErrorCode ierr;
1767b24ad042SBarry Smith   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1768b24ad042SBarry Smith   PetscInt       i,N,*rows,*owners = l->rowners;
1769b24ad042SBarry Smith   PetscInt       *nprocs,j,idx,nsends,row;
1770b24ad042SBarry Smith   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
17716543fbbaSBarry Smith   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1772521d7252SBarry Smith   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
17730ac07820SSatish Balay   MPI_Comm       comm = A->comm;
17740ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
17750ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
17760ac07820SSatish Balay   IS             istmp;
17776543fbbaSBarry Smith #if defined(PETSC_DEBUG)
17786543fbbaSBarry Smith   PetscTruth     found = PETSC_FALSE;
17796543fbbaSBarry Smith #endif
17800ac07820SSatish Balay 
1781d64ed03dSBarry Smith   PetscFunctionBegin;
1782f14a1c24SBarry Smith   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
17830ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
17840ac07820SSatish Balay 
17850ac07820SSatish Balay   /*  first count number of contributors to each processor */
1786b24ad042SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1787b24ad042SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1788b24ad042SBarry Smith   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
17896543fbbaSBarry Smith   j = 0;
17900ac07820SSatish Balay   for (i=0; i<N; i++) {
17916543fbbaSBarry Smith     if (lastidx > (idx = rows[i])) j = 0;
17926543fbbaSBarry Smith     lastidx = idx;
17936543fbbaSBarry Smith     for (; j<size; j++) {
17940ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
17956543fbbaSBarry Smith         nprocs[2*j]++;
17966543fbbaSBarry Smith         nprocs[2*j+1] = 1;
17976543fbbaSBarry Smith         owner[i] = j;
17986543fbbaSBarry Smith #if defined(PETSC_DEBUG)
17996543fbbaSBarry Smith         found = PETSC_TRUE;
18006543fbbaSBarry Smith #endif
18016543fbbaSBarry Smith         break;
18020ac07820SSatish Balay       }
18030ac07820SSatish Balay     }
18046543fbbaSBarry Smith #if defined(PETSC_DEBUG)
180529bbc08cSBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
18066543fbbaSBarry Smith     found = PETSC_FALSE;
18076543fbbaSBarry Smith #endif
18080ac07820SSatish Balay   }
1809c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
18100ac07820SSatish Balay 
18110ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
1812c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
18130ac07820SSatish Balay 
18140ac07820SSatish Balay   /* post receives:   */
1815b24ad042SBarry Smith   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1816b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
18170ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1818b24ad042SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
18190ac07820SSatish Balay   }
18200ac07820SSatish Balay 
18210ac07820SSatish Balay   /* do sends:
18220ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
18230ac07820SSatish Balay      the ith processor
18240ac07820SSatish Balay   */
1825b24ad042SBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1826b0a32e0cSBarry Smith   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1827b24ad042SBarry Smith   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
18280ac07820SSatish Balay   starts[0]  = 0;
1829c1dc657dSBarry Smith   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
18300ac07820SSatish Balay   for (i=0; i<N; i++) {
18310ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
18320ac07820SSatish Balay   }
18336831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
18340ac07820SSatish Balay 
18350ac07820SSatish Balay   starts[0] = 0;
1836c1dc657dSBarry Smith   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
18370ac07820SSatish Balay   count = 0;
18380ac07820SSatish Balay   for (i=0; i<size; i++) {
1839c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
1840b24ad042SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
18410ac07820SSatish Balay     }
18420ac07820SSatish Balay   }
1843606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
18440ac07820SSatish Balay 
18450ac07820SSatish Balay   base = owners[rank]*bs;
18460ac07820SSatish Balay 
18470ac07820SSatish Balay   /*  wait on receives */
1848b24ad042SBarry Smith   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
18490ac07820SSatish Balay   source = lens + nrecvs;
18500ac07820SSatish Balay   count  = nrecvs; slen = 0;
18510ac07820SSatish Balay   while (count) {
1852ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
18530ac07820SSatish Balay     /* unpack receives into our local space */
1854b24ad042SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
18550ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
18560ac07820SSatish Balay     lens[imdex]    = n;
18570ac07820SSatish Balay     slen          += n;
18580ac07820SSatish Balay     count--;
18590ac07820SSatish Balay   }
1860606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
18610ac07820SSatish Balay 
18620ac07820SSatish Balay   /* move the data into the send scatter */
1863b24ad042SBarry Smith   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
18640ac07820SSatish Balay   count = 0;
18650ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
18660ac07820SSatish Balay     values = rvalues + i*nmax;
18670ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
18680ac07820SSatish Balay       lrows[count++] = values[j] - base;
18690ac07820SSatish Balay     }
18700ac07820SSatish Balay   }
1871606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1872606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1873606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1874606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
18750ac07820SSatish Balay 
18760ac07820SSatish Balay   /* actually zap the local rows */
1877029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
187852e6d16bSBarry Smith   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
1879a07cd24cSSatish Balay 
188072dacd9aSBarry Smith   /*
188172dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
188272dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
188372dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
188472dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
188572dacd9aSBarry Smith 
188672dacd9aSBarry Smith        Contributed by: Mathew Knepley
188772dacd9aSBarry Smith   */
18889c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
18896fa18ffdSBarry Smith   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
18909c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
18916fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
18929c957beeSSatish Balay   } else if (diag) {
18936fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1894fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
189529bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1896fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
18976525c446SSatish Balay     }
1898a07cd24cSSatish Balay     for (i=0; i<slen; i++) {
1899a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
19003f11ea53SBarry Smith       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1901a07cd24cSSatish Balay     }
1902a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1903a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
19049c957beeSSatish Balay   } else {
19056fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1906a07cd24cSSatish Balay   }
19079c957beeSSatish Balay 
19089c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1909606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1910a07cd24cSSatish Balay 
19110ac07820SSatish Balay   /* wait on sends */
19120ac07820SSatish Balay   if (nsends) {
191382502324SSatish Balay     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1914ca161407SBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1915606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
19160ac07820SSatish Balay   }
1917606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1918606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
19190ac07820SSatish Balay 
19203a40ed3dSBarry Smith   PetscFunctionReturn(0);
19210ac07820SSatish Balay }
192272dacd9aSBarry Smith 
19234a2ae208SSatish Balay #undef __FUNCT__
19244a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1925dfbe8321SBarry Smith PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1926ba4ca20aSSatish Balay {
1927ba4ca20aSSatish Balay   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
192825fdafccSSatish Balay   MPI_Comm          comm = A->comm;
1929b24ad042SBarry Smith   static PetscTruth called = PETSC_FALSE;
1930dfbe8321SBarry Smith   PetscErrorCode    ierr;
1931ba4ca20aSSatish Balay 
1932d64ed03dSBarry Smith   PetscFunctionBegin;
19333a40ed3dSBarry Smith   if (!a->rank) {
19343a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
193525fdafccSSatish Balay   }
1936b24ad042SBarry Smith   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1937d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1938d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
19393a40ed3dSBarry Smith   PetscFunctionReturn(0);
1940ba4ca20aSSatish Balay }
19410ac07820SSatish Balay 
19424a2ae208SSatish Balay #undef __FUNCT__
19434a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1944dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1945bb5a7306SBarry Smith {
1946bb5a7306SBarry Smith   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1947dfbe8321SBarry Smith   PetscErrorCode ierr;
1948d64ed03dSBarry Smith 
1949d64ed03dSBarry Smith   PetscFunctionBegin;
1950bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
19513a40ed3dSBarry Smith   PetscFunctionReturn(0);
1952bb5a7306SBarry Smith }
1953bb5a7306SBarry Smith 
19546849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
19550ac07820SSatish Balay 
19564a2ae208SSatish Balay #undef __FUNCT__
19574a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ"
1958dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
19597fc3c18eSBarry Smith {
19607fc3c18eSBarry Smith   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
19617fc3c18eSBarry Smith   Mat            a,b,c,d;
19627fc3c18eSBarry Smith   PetscTruth     flg;
1963dfbe8321SBarry Smith   PetscErrorCode ierr;
19647fc3c18eSBarry Smith 
19657fc3c18eSBarry Smith   PetscFunctionBegin;
19667fc3c18eSBarry Smith   a = matA->A; b = matA->B;
19677fc3c18eSBarry Smith   c = matB->A; d = matB->B;
19687fc3c18eSBarry Smith 
19697fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1970abc0a331SBarry Smith   if (flg) {
19717fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
19727fc3c18eSBarry Smith   }
19737fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
19747fc3c18eSBarry Smith   PetscFunctionReturn(0);
19757fc3c18eSBarry Smith }
19767fc3c18eSBarry Smith 
1977273d9f13SBarry Smith 
19784a2ae208SSatish Balay #undef __FUNCT__
19794a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1980dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1981273d9f13SBarry Smith {
1982dfbe8321SBarry Smith   PetscErrorCode ierr;
1983273d9f13SBarry Smith 
1984273d9f13SBarry Smith   PetscFunctionBegin;
1985273d9f13SBarry Smith   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1986273d9f13SBarry Smith   PetscFunctionReturn(0);
1987273d9f13SBarry Smith }
1988273d9f13SBarry Smith 
198979bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1990cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1991cc2dc46cSBarry Smith        MatSetValues_MPIBAIJ,
1992cc2dc46cSBarry Smith        MatGetRow_MPIBAIJ,
1993cc2dc46cSBarry Smith        MatRestoreRow_MPIBAIJ,
1994cc2dc46cSBarry Smith        MatMult_MPIBAIJ,
199597304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ,
19967c922b88SBarry Smith        MatMultTranspose_MPIBAIJ,
19977c922b88SBarry Smith        MatMultTransposeAdd_MPIBAIJ,
1998cc2dc46cSBarry Smith        0,
1999cc2dc46cSBarry Smith        0,
2000cc2dc46cSBarry Smith        0,
200197304618SKris Buschelman /*10*/ 0,
2002cc2dc46cSBarry Smith        0,
2003cc2dc46cSBarry Smith        0,
2004cc2dc46cSBarry Smith        0,
2005cc2dc46cSBarry Smith        MatTranspose_MPIBAIJ,
200697304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ,
20077fc3c18eSBarry Smith        MatEqual_MPIBAIJ,
2008cc2dc46cSBarry Smith        MatGetDiagonal_MPIBAIJ,
2009cc2dc46cSBarry Smith        MatDiagonalScale_MPIBAIJ,
2010cc2dc46cSBarry Smith        MatNorm_MPIBAIJ,
201197304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ,
2012cc2dc46cSBarry Smith        MatAssemblyEnd_MPIBAIJ,
2013cc2dc46cSBarry Smith        0,
2014cc2dc46cSBarry Smith        MatSetOption_MPIBAIJ,
2015cc2dc46cSBarry Smith        MatZeroEntries_MPIBAIJ,
201697304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ,
2017cc2dc46cSBarry Smith        0,
2018cc2dc46cSBarry Smith        0,
2019cc2dc46cSBarry Smith        0,
2020cc2dc46cSBarry Smith        0,
202197304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ,
2022273d9f13SBarry Smith        0,
2023cc2dc46cSBarry Smith        0,
2024cc2dc46cSBarry Smith        0,
2025cc2dc46cSBarry Smith        0,
202697304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ,
2027cc2dc46cSBarry Smith        0,
2028cc2dc46cSBarry Smith        0,
2029cc2dc46cSBarry Smith        0,
2030cc2dc46cSBarry Smith        0,
203197304618SKris Buschelman /*40*/ 0,
2032cc2dc46cSBarry Smith        MatGetSubMatrices_MPIBAIJ,
2033cc2dc46cSBarry Smith        MatIncreaseOverlap_MPIBAIJ,
2034cc2dc46cSBarry Smith        MatGetValues_MPIBAIJ,
2035cc2dc46cSBarry Smith        0,
203697304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ,
2037cc2dc46cSBarry Smith        MatScale_MPIBAIJ,
2038cc2dc46cSBarry Smith        0,
2039cc2dc46cSBarry Smith        0,
2040cc2dc46cSBarry Smith        0,
2041521d7252SBarry Smith /*50*/ 0,
2042cc2dc46cSBarry Smith        0,
2043cc2dc46cSBarry Smith        0,
2044cc2dc46cSBarry Smith        0,
2045cc2dc46cSBarry Smith        0,
204697304618SKris Buschelman /*55*/ 0,
2047cc2dc46cSBarry Smith        0,
2048cc2dc46cSBarry Smith        MatSetUnfactored_MPIBAIJ,
2049cc2dc46cSBarry Smith        0,
2050cc2dc46cSBarry Smith        MatSetValuesBlocked_MPIBAIJ,
205197304618SKris Buschelman /*60*/ 0,
2052f14a1c24SBarry Smith        MatDestroy_MPIBAIJ,
2053f14a1c24SBarry Smith        MatView_MPIBAIJ,
20548a124369SBarry Smith        MatGetPetscMaps_Petsc,
20557843d17aSBarry Smith        0,
205697304618SKris Buschelman /*65*/ 0,
20577843d17aSBarry Smith        0,
20587843d17aSBarry Smith        0,
20597843d17aSBarry Smith        0,
20607843d17aSBarry Smith        0,
206197304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ,
20627843d17aSBarry Smith        0,
206397304618SKris Buschelman        0,
206497304618SKris Buschelman        0,
206597304618SKris Buschelman        0,
206697304618SKris Buschelman /*75*/ 0,
206797304618SKris Buschelman        0,
206897304618SKris Buschelman        0,
206997304618SKris Buschelman        0,
207097304618SKris Buschelman        0,
207197304618SKris Buschelman /*80*/ 0,
207297304618SKris Buschelman        0,
207397304618SKris Buschelman        0,
207497304618SKris Buschelman        0,
2075865e5f61SKris Buschelman        MatLoad_MPIBAIJ,
2076865e5f61SKris Buschelman /*85*/ 0,
2077865e5f61SKris Buschelman        0,
2078865e5f61SKris Buschelman        0,
2079865e5f61SKris Buschelman        0,
2080865e5f61SKris Buschelman        0,
2081865e5f61SKris Buschelman /*90*/ 0,
2082865e5f61SKris Buschelman        0,
2083865e5f61SKris Buschelman        0,
2084865e5f61SKris Buschelman        0,
2085865e5f61SKris Buschelman        0,
2086865e5f61SKris Buschelman /*95*/ 0,
2087865e5f61SKris Buschelman        0,
2088865e5f61SKris Buschelman        0,
2089865e5f61SKris Buschelman        0};
209079bdfe76SSatish Balay 
20915ef9f2a5SBarry Smith 
2092e18c124aSSatish Balay EXTERN_C_BEGIN
20934a2ae208SSatish Balay #undef __FUNCT__
20944a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2095*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
20965ef9f2a5SBarry Smith {
20975ef9f2a5SBarry Smith   PetscFunctionBegin;
20985ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
20995ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
21005ef9f2a5SBarry Smith   PetscFunctionReturn(0);
21015ef9f2a5SBarry Smith }
2102e18c124aSSatish Balay EXTERN_C_END
210379bdfe76SSatish Balay 
2104273d9f13SBarry Smith EXTERN_C_BEGIN
2105*be1d678aSKris Buschelman extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
2106d94109b8SHong Zhang EXTERN_C_END
2107d94109b8SHong Zhang 
2108aac34f13SBarry Smith #undef __FUNCT__
2109aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2110aac34f13SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2111aac34f13SBarry Smith {
2112aac34f13SBarry Smith   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2113aac34f13SBarry Smith   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2114aac34f13SBarry Smith   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2115aac34f13SBarry Smith   const PetscInt *JJ;
2116aac34f13SBarry Smith   PetscScalar    *values;
2117aac34f13SBarry Smith   PetscErrorCode ierr;
2118aac34f13SBarry Smith 
2119aac34f13SBarry Smith   PetscFunctionBegin;
2120aac34f13SBarry Smith #if defined(PETSC_OPT_g)
2121aac34f13SBarry Smith   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2122aac34f13SBarry Smith #endif
2123aac34f13SBarry Smith   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2124aac34f13SBarry Smith   o_nnz = d_nnz + m;
2125aac34f13SBarry Smith 
2126aac34f13SBarry Smith   for (i=0; i<m; i++) {
2127aac34f13SBarry Smith     nnz     = I[i+1]- I[i];
2128aac34f13SBarry Smith     JJ      = J + I[i];
2129aac34f13SBarry Smith     nnz_max = PetscMax(nnz_max,nnz);
2130aac34f13SBarry Smith #if defined(PETSC_OPT_g)
2131aac34f13SBarry Smith     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2132aac34f13SBarry Smith #endif
2133aac34f13SBarry Smith     for (j=0; j<nnz; j++) {
2134aac34f13SBarry Smith       if (*JJ >= cstart) break;
2135aac34f13SBarry Smith       JJ++;
2136aac34f13SBarry Smith     }
2137aac34f13SBarry Smith     d = 0;
2138aac34f13SBarry Smith     for (; j<nnz; j++) {
2139aac34f13SBarry Smith       if (*JJ++ >= cend) break;
2140aac34f13SBarry Smith       d++;
2141aac34f13SBarry Smith     }
2142aac34f13SBarry Smith     d_nnz[i] = d;
2143aac34f13SBarry Smith     o_nnz[i] = nnz - d;
2144aac34f13SBarry Smith   }
2145aac34f13SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2146aac34f13SBarry Smith   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2147aac34f13SBarry Smith 
2148aac34f13SBarry Smith   if (v) values = (PetscScalar*)v;
2149aac34f13SBarry Smith   else {
2150aac34f13SBarry Smith     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2151aac34f13SBarry Smith     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2152aac34f13SBarry Smith   }
2153aac34f13SBarry Smith 
2154aac34f13SBarry Smith   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2155aac34f13SBarry Smith   for (i=0; i<m; i++) {
2156aac34f13SBarry Smith     ii   = i + rstart;
2157aac34f13SBarry Smith     nnz  = I[i+1]- I[i];
2158aac34f13SBarry Smith     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2159aac34f13SBarry Smith   }
2160aac34f13SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2161aac34f13SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2162aac34f13SBarry Smith   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2163aac34f13SBarry Smith 
2164aac34f13SBarry Smith   if (!v) {
2165aac34f13SBarry Smith     ierr = PetscFree(values);CHKERRQ(ierr);
2166aac34f13SBarry Smith   }
2167aac34f13SBarry Smith   PetscFunctionReturn(0);
2168aac34f13SBarry Smith }
2169aac34f13SBarry Smith 
2170aac34f13SBarry Smith #undef __FUNCT__
2171aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2172aac34f13SBarry Smith /*@C
2173aac34f13SBarry Smith    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2174aac34f13SBarry Smith    (the default parallel PETSc format).
2175aac34f13SBarry Smith 
2176aac34f13SBarry Smith    Collective on MPI_Comm
2177aac34f13SBarry Smith 
2178aac34f13SBarry Smith    Input Parameters:
2179aac34f13SBarry Smith +  A - the matrix
2180aac34f13SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2181aac34f13SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2182aac34f13SBarry Smith -  v - optional values in the matrix
2183aac34f13SBarry Smith 
2184aac34f13SBarry Smith    Level: developer
2185aac34f13SBarry Smith 
2186aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel
2187aac34f13SBarry Smith 
2188aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2189aac34f13SBarry Smith @*/
2190*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2191aac34f13SBarry Smith {
2192aac34f13SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2193aac34f13SBarry Smith 
2194aac34f13SBarry Smith   PetscFunctionBegin;
2195aac34f13SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2196aac34f13SBarry Smith   if (f) {
2197aac34f13SBarry Smith     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2198aac34f13SBarry Smith   }
2199aac34f13SBarry Smith   PetscFunctionReturn(0);
2200aac34f13SBarry Smith }
2201aac34f13SBarry Smith 
2202d94109b8SHong Zhang EXTERN_C_BEGIN
22034a2ae208SSatish Balay #undef __FUNCT__
2204a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2205*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2206a23d5eceSKris Buschelman {
2207a23d5eceSKris Buschelman   Mat_MPIBAIJ    *b;
2208dfbe8321SBarry Smith   PetscErrorCode ierr;
2209b24ad042SBarry Smith   PetscInt       i;
2210a23d5eceSKris Buschelman 
2211a23d5eceSKris Buschelman   PetscFunctionBegin;
2212a23d5eceSKris Buschelman   B->preallocated = PETSC_TRUE;
2213a23d5eceSKris Buschelman   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2214a23d5eceSKris Buschelman 
2215a23d5eceSKris Buschelman   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2216a23d5eceSKris Buschelman   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2217a23d5eceSKris Buschelman   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
221877431f27SBarry Smith   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
221977431f27SBarry Smith   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2220a23d5eceSKris Buschelman   if (d_nnz) {
2221a23d5eceSKris Buschelman   for (i=0; i<B->m/bs; i++) {
222277431f27SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2223a23d5eceSKris Buschelman     }
2224a23d5eceSKris Buschelman   }
2225a23d5eceSKris Buschelman   if (o_nnz) {
2226a23d5eceSKris Buschelman     for (i=0; i<B->m/bs; i++) {
222777431f27SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2228a23d5eceSKris Buschelman     }
2229a23d5eceSKris Buschelman   }
2230a23d5eceSKris Buschelman 
2231a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2232a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2233a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2234a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2235a23d5eceSKris Buschelman 
2236a23d5eceSKris Buschelman   b = (Mat_MPIBAIJ*)B->data;
2237521d7252SBarry Smith   B->bs  = bs;
2238a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2239a23d5eceSKris Buschelman   b->mbs = B->m/bs;
2240a23d5eceSKris Buschelman   b->nbs = B->n/bs;
2241a23d5eceSKris Buschelman   b->Mbs = B->M/bs;
2242a23d5eceSKris Buschelman   b->Nbs = B->N/bs;
2243a23d5eceSKris Buschelman 
2244b24ad042SBarry Smith   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2245a23d5eceSKris Buschelman   b->rowners[0]    = 0;
2246a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2247a23d5eceSKris Buschelman     b->rowners[i] += b->rowners[i-1];
2248a23d5eceSKris Buschelman   }
2249a23d5eceSKris Buschelman   b->rstart    = b->rowners[b->rank];
2250a23d5eceSKris Buschelman   b->rend      = b->rowners[b->rank+1];
2251a23d5eceSKris Buschelman 
2252b24ad042SBarry Smith   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2253a23d5eceSKris Buschelman   b->cowners[0] = 0;
2254a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2255a23d5eceSKris Buschelman     b->cowners[i] += b->cowners[i-1];
2256a23d5eceSKris Buschelman   }
2257a23d5eceSKris Buschelman   b->cstart    = b->cowners[b->rank];
2258a23d5eceSKris Buschelman   b->cend      = b->cowners[b->rank+1];
2259a23d5eceSKris Buschelman 
2260a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2261a23d5eceSKris Buschelman     b->rowners_bs[i] = b->rowners[i]*bs;
2262a23d5eceSKris Buschelman   }
2263a23d5eceSKris Buschelman   b->rstart_bs = b->rstart*bs;
2264a23d5eceSKris Buschelman   b->rend_bs   = b->rend*bs;
2265a23d5eceSKris Buschelman   b->cstart_bs = b->cstart*bs;
2266a23d5eceSKris Buschelman   b->cend_bs   = b->cend*bs;
2267a23d5eceSKris Buschelman 
22689c097c71SKris Buschelman   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
22699c097c71SKris Buschelman   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2270c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
227152e6d16bSBarry Smith   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
22729c097c71SKris Buschelman   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
22739c097c71SKris Buschelman   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2274c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
227552e6d16bSBarry Smith   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2276c60e587dSKris Buschelman 
2277a23d5eceSKris Buschelman   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2278a23d5eceSKris Buschelman 
2279a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2280a23d5eceSKris Buschelman }
2281a23d5eceSKris Buschelman EXTERN_C_END
2282a23d5eceSKris Buschelman 
2283a23d5eceSKris Buschelman EXTERN_C_BEGIN
2284*be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2285*be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
228692b32695SKris Buschelman EXTERN_C_END
22875bf65638SKris Buschelman 
22880bad9183SKris Buschelman /*MC
2289fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
22900bad9183SKris Buschelman 
22910bad9183SKris Buschelman    Options Database Keys:
22920bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
22930bad9183SKris Buschelman 
22940bad9183SKris Buschelman   Level: beginner
22950bad9183SKris Buschelman 
22960bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ
22970bad9183SKris Buschelman M*/
22980bad9183SKris Buschelman 
229992b32695SKris Buschelman EXTERN_C_BEGIN
2300a23d5eceSKris Buschelman #undef __FUNCT__
23014a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ"
2302*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2303273d9f13SBarry Smith {
2304273d9f13SBarry Smith   Mat_MPIBAIJ    *b;
2305dfbe8321SBarry Smith   PetscErrorCode ierr;
2306273d9f13SBarry Smith   PetscTruth     flg;
2307273d9f13SBarry Smith 
2308273d9f13SBarry Smith   PetscFunctionBegin;
230982502324SSatish Balay   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
231082502324SSatish Balay   B->data = (void*)b;
231182502324SSatish Balay 
2312273d9f13SBarry Smith   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2313273d9f13SBarry Smith   B->mapping    = 0;
2314273d9f13SBarry Smith   B->factor     = 0;
2315273d9f13SBarry Smith   B->assembled  = PETSC_FALSE;
2316273d9f13SBarry Smith 
2317273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
2318273d9f13SBarry Smith   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2319273d9f13SBarry Smith   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2320273d9f13SBarry Smith 
2321273d9f13SBarry Smith   /* build local table of row and column ownerships */
2322b24ad042SBarry Smith   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
232352e6d16bSBarry Smith   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2324273d9f13SBarry Smith   b->cowners    = b->rowners + b->size + 2;
2325273d9f13SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2326273d9f13SBarry Smith 
2327273d9f13SBarry Smith   /* build cache for off array entries formed */
2328273d9f13SBarry Smith   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2329273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
2330273d9f13SBarry Smith   b->colmap      = PETSC_NULL;
2331273d9f13SBarry Smith   b->garray      = PETSC_NULL;
2332273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2333273d9f13SBarry Smith 
2334cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
2335273d9f13SBarry Smith   /* stuff for MatSetValues_XXX in single precision */
233664a35ccbSBarry Smith   b->setvalueslen     = 0;
2337273d9f13SBarry Smith   b->setvaluescopy    = PETSC_NULL;
2338273d9f13SBarry Smith #endif
2339273d9f13SBarry Smith 
2340273d9f13SBarry Smith   /* stuff used in block assembly */
2341273d9f13SBarry Smith   b->barray       = 0;
2342273d9f13SBarry Smith 
2343273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2344273d9f13SBarry Smith   b->lvec         = 0;
2345273d9f13SBarry Smith   b->Mvctx        = 0;
2346273d9f13SBarry Smith 
2347273d9f13SBarry Smith   /* stuff for MatGetRow() */
2348273d9f13SBarry Smith   b->rowindices   = 0;
2349273d9f13SBarry Smith   b->rowvalues    = 0;
2350273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2351273d9f13SBarry Smith 
2352273d9f13SBarry Smith   /* hash table stuff */
2353273d9f13SBarry Smith   b->ht           = 0;
2354273d9f13SBarry Smith   b->hd           = 0;
2355273d9f13SBarry Smith   b->ht_size      = 0;
2356273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2357273d9f13SBarry Smith   b->ht_fact      = 0;
2358273d9f13SBarry Smith   b->ht_total_ct  = 0;
2359273d9f13SBarry Smith   b->ht_insert_ct = 0;
2360273d9f13SBarry Smith 
2361b0a32e0cSBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2362273d9f13SBarry Smith   if (flg) {
2363f6275e2eSBarry Smith     PetscReal fact = 1.39;
2364273d9f13SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
236587828ca2SBarry Smith     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2366273d9f13SBarry Smith     if (fact <= 1.0) fact = 1.39;
2367273d9f13SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
236863ba0a88SBarry Smith     ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
2369273d9f13SBarry Smith   }
2370273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2371273d9f13SBarry Smith                                      "MatStoreValues_MPIBAIJ",
2372273d9f13SBarry Smith                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2373273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2374273d9f13SBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
2375273d9f13SBarry Smith                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2376273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2377273d9f13SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
2378273d9f13SBarry Smith                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2379a23d5eceSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2380a23d5eceSKris Buschelman                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2381a23d5eceSKris Buschelman                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2382aac34f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2383aac34f13SBarry Smith 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2384aac34f13SBarry Smith 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
238592b32695SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
238692b32695SKris Buschelman                                      "MatDiagonalScaleLocal_MPIBAIJ",
238792b32695SKris Buschelman                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
23885bf65638SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
23895bf65638SKris Buschelman                                      "MatSetHashTableFactor_MPIBAIJ",
23905bf65638SKris Buschelman                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2391273d9f13SBarry Smith   PetscFunctionReturn(0);
2392273d9f13SBarry Smith }
2393273d9f13SBarry Smith EXTERN_C_END
2394273d9f13SBarry Smith 
2395209238afSKris Buschelman /*MC
2396002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2397209238afSKris Buschelman 
2398209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2399209238afSKris Buschelman    and MATMPIBAIJ otherwise.
2400209238afSKris Buschelman 
2401209238afSKris Buschelman    Options Database Keys:
2402209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2403209238afSKris Buschelman 
2404209238afSKris Buschelman   Level: beginner
2405209238afSKris Buschelman 
2406aac34f13SBarry Smith .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2407209238afSKris Buschelman M*/
2408209238afSKris Buschelman 
2409209238afSKris Buschelman EXTERN_C_BEGIN
2410209238afSKris Buschelman #undef __FUNCT__
2411209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ"
2412*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2413dfbe8321SBarry Smith {
24146849ba73SBarry Smith   PetscErrorCode ierr;
2415b24ad042SBarry Smith   PetscMPIInt    size;
2416209238afSKris Buschelman 
2417209238afSKris Buschelman   PetscFunctionBegin;
2418209238afSKris Buschelman   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2419209238afSKris Buschelman   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2420209238afSKris Buschelman   if (size == 1) {
2421209238afSKris Buschelman     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2422209238afSKris Buschelman   } else {
2423209238afSKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2424209238afSKris Buschelman   }
2425209238afSKris Buschelman   PetscFunctionReturn(0);
2426209238afSKris Buschelman }
2427209238afSKris Buschelman EXTERN_C_END
2428209238afSKris Buschelman 
24294a2ae208SSatish Balay #undef __FUNCT__
24304a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2431273d9f13SBarry Smith /*@C
2432aac34f13SBarry Smith    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2433273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2434273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2435273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2436273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2437273d9f13SBarry Smith 
2438273d9f13SBarry Smith    Collective on Mat
2439273d9f13SBarry Smith 
2440273d9f13SBarry Smith    Input Parameters:
2441273d9f13SBarry Smith +  A - the matrix
2442273d9f13SBarry Smith .  bs   - size of blockk
2443273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2444273d9f13SBarry Smith            submatrix  (same for all local rows)
2445273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
2446273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
2447273d9f13SBarry Smith            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2448273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2449273d9f13SBarry Smith            submatrix (same for all local rows).
2450273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
2451273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
2452273d9f13SBarry Smith            each block row) or PETSC_NULL.
2453273d9f13SBarry Smith 
245449a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
2455273d9f13SBarry Smith 
2456273d9f13SBarry Smith    Options Database Keys:
2457273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2458273d9f13SBarry Smith                      block calculations (much slower)
2459273d9f13SBarry Smith .   -mat_block_size - size of the blocks to use
2460273d9f13SBarry Smith 
2461273d9f13SBarry Smith    Notes:
2462273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2463273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
2464273d9f13SBarry Smith 
2465273d9f13SBarry Smith    Storage Information:
2466273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
2467273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
2468273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
2469273d9f13SBarry Smith    local matrix (a rectangular submatrix).
2470273d9f13SBarry Smith 
2471273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
2472273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
2473273d9f13SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2474273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
2475273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2476273d9f13SBarry Smith 
2477273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2478273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
2479273d9f13SBarry Smith 
2480273d9f13SBarry Smith .vb
2481273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
2482273d9f13SBarry Smith           -------------------
2483273d9f13SBarry Smith    row 3  |  o o o d d d o o o o o o
2484273d9f13SBarry Smith    row 4  |  o o o d d d o o o o o o
2485273d9f13SBarry Smith    row 5  |  o o o d d d o o o o o o
2486273d9f13SBarry Smith           -------------------
2487273d9f13SBarry Smith .ve
2488273d9f13SBarry Smith 
2489273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
2490273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
2491273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2492273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
2493273d9f13SBarry Smith 
2494273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2495273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2496273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
2497273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2498273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
2499273d9f13SBarry Smith    matrices.
2500273d9f13SBarry Smith 
2501273d9f13SBarry Smith    Level: intermediate
2502273d9f13SBarry Smith 
2503273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel
2504273d9f13SBarry Smith 
2505aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2506273d9f13SBarry Smith @*/
2507*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2508273d9f13SBarry Smith {
2509b24ad042SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2510273d9f13SBarry Smith 
2511273d9f13SBarry Smith   PetscFunctionBegin;
2512a23d5eceSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2513a23d5eceSKris Buschelman   if (f) {
2514a23d5eceSKris Buschelman     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2515273d9f13SBarry Smith   }
2516273d9f13SBarry Smith   PetscFunctionReturn(0);
2517273d9f13SBarry Smith }
2518273d9f13SBarry Smith 
25194a2ae208SSatish Balay #undef __FUNCT__
25204a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ"
252179bdfe76SSatish Balay /*@C
252279bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
252379bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
252479bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
252579bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
252679bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
252779bdfe76SSatish Balay 
2528db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
2529db81eaa0SLois Curfman McInnes 
253079bdfe76SSatish Balay    Input Parameters:
2531db81eaa0SLois Curfman McInnes +  comm - MPI communicator
253279bdfe76SSatish Balay .  bs   - size of blockk
253379bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
253492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
253592e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
253692e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
253792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
253892e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
2539be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2540be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
254147a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
254279bdfe76SSatish Balay            submatrix  (same for all local rows)
254347a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
254492e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2545db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
254647a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
254779bdfe76SSatish Balay            submatrix (same for all local rows).
254847a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
254992e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
255092e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
255179bdfe76SSatish Balay 
255279bdfe76SSatish Balay    Output Parameter:
255379bdfe76SSatish Balay .  A - the matrix
255479bdfe76SSatish Balay 
2555db81eaa0SLois Curfman McInnes    Options Database Keys:
2556db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
2557db81eaa0SLois Curfman McInnes                      block calculations (much slower)
2558db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
25593ffaccefSLois Curfman McInnes 
2560b259b22eSLois Curfman McInnes    Notes:
256149a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
256249a6f317SBarry Smith 
256347a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
256447a75d0bSBarry Smith 
256579bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
256679bdfe76SSatish Balay    (possibly both).
256779bdfe76SSatish Balay 
2568be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2569be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2570be79a94dSBarry Smith 
257179bdfe76SSatish Balay    Storage Information:
257279bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
257379bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
257479bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
257579bdfe76SSatish Balay    local matrix (a rectangular submatrix).
257679bdfe76SSatish Balay 
257779bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
257879bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
257979bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
258079bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
258179bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
258279bdfe76SSatish Balay 
258379bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
258479bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
258579bdfe76SSatish Balay 
2586db81eaa0SLois Curfman McInnes .vb
2587db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2588db81eaa0SLois Curfman McInnes           -------------------
2589db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2590db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2591db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2592db81eaa0SLois Curfman McInnes           -------------------
2593db81eaa0SLois Curfman McInnes .ve
259479bdfe76SSatish Balay 
259579bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
259679bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
259779bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
259857b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
259979bdfe76SSatish Balay 
2600d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2601d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
260279bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
260392e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
260492e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
26056da5968aSLois Curfman McInnes    matrices.
260679bdfe76SSatish Balay 
2607027ccd11SLois Curfman McInnes    Level: intermediate
2608027ccd11SLois Curfman McInnes 
260992e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
261079bdfe76SSatish Balay 
2611aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
261279bdfe76SSatish Balay @*/
2613*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
261479bdfe76SSatish Balay {
26156849ba73SBarry Smith   PetscErrorCode ierr;
2616b24ad042SBarry Smith   PetscMPIInt    size;
261779bdfe76SSatish Balay 
2618d64ed03dSBarry Smith   PetscFunctionBegin;
2619273d9f13SBarry Smith   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2620d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2621273d9f13SBarry Smith   if (size > 1) {
2622273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2623273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2624273d9f13SBarry Smith   } else {
2625273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2626273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
26273914022bSBarry Smith   }
26283a40ed3dSBarry Smith   PetscFunctionReturn(0);
262979bdfe76SSatish Balay }
2630026e39d0SSatish Balay 
26314a2ae208SSatish Balay #undef __FUNCT__
26324a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ"
26336849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
26340ac07820SSatish Balay {
26350ac07820SSatish Balay   Mat            mat;
26360ac07820SSatish Balay   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2637dfbe8321SBarry Smith   PetscErrorCode ierr;
2638b24ad042SBarry Smith   PetscInt       len=0;
26390ac07820SSatish Balay 
2640d64ed03dSBarry Smith   PetscFunctionBegin;
26410ac07820SSatish Balay   *newmat       = 0;
2642273d9f13SBarry Smith   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2643be5d1d56SKris Buschelman   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
26441d5dac46SHong Zhang   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
26457fff6886SHong Zhang 
26464beb1cfeSHong Zhang   mat->factor       = matin->factor;
2647273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
26480ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
26497fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
26507fff6886SHong Zhang 
2651273d9f13SBarry Smith   a      = (Mat_MPIBAIJ*)mat->data;
2652521d7252SBarry Smith   mat->bs  = matin->bs;
26530ac07820SSatish Balay   a->bs2   = oldmat->bs2;
26540ac07820SSatish Balay   a->mbs   = oldmat->mbs;
26550ac07820SSatish Balay   a->nbs   = oldmat->nbs;
26560ac07820SSatish Balay   a->Mbs   = oldmat->Mbs;
26570ac07820SSatish Balay   a->Nbs   = oldmat->Nbs;
26580ac07820SSatish Balay 
26590ac07820SSatish Balay   a->rstart       = oldmat->rstart;
26600ac07820SSatish Balay   a->rend         = oldmat->rend;
26610ac07820SSatish Balay   a->cstart       = oldmat->cstart;
26620ac07820SSatish Balay   a->cend         = oldmat->cend;
26630ac07820SSatish Balay   a->size         = oldmat->size;
26640ac07820SSatish Balay   a->rank         = oldmat->rank;
2665aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2666aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2667aef5e8e0SSatish Balay   a->rowindices   = 0;
26680ac07820SSatish Balay   a->rowvalues    = 0;
26690ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
267030793edcSSatish Balay   a->barray       = 0;
26713123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
26723123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
26733123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
26743123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
26750ac07820SSatish Balay 
2676133cdb44SSatish Balay   /* hash table stuff */
2677133cdb44SSatish Balay   a->ht           = 0;
2678133cdb44SSatish Balay   a->hd           = 0;
2679133cdb44SSatish Balay   a->ht_size      = 0;
2680133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
268125fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2682133cdb44SSatish Balay   a->ht_total_ct  = 0;
2683133cdb44SSatish Balay   a->ht_insert_ct = 0;
2684133cdb44SSatish Balay 
2685b24ad042SBarry Smith   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
26868798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2687521d7252SBarry Smith   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
26880ac07820SSatish Balay   if (oldmat->colmap) {
2689aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
26900f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
269148e59246SSatish Balay #else
2692b24ad042SBarry Smith   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
269352e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2694b24ad042SBarry Smith   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
269548e59246SSatish Balay #endif
26960ac07820SSatish Balay   } else a->colmap = 0;
26974beb1cfeSHong Zhang 
26980ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2699b24ad042SBarry Smith     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
270052e6d16bSBarry Smith     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2701b24ad042SBarry Smith     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
27020ac07820SSatish Balay   } else a->garray = 0;
27030ac07820SSatish Balay 
27040ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
270552e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
27060ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
270752e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
27087fff6886SHong Zhang 
27092e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
271052e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
27112e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
271252e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2713b0a32e0cSBarry Smith   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
27140ac07820SSatish Balay   *newmat = mat;
27154beb1cfeSHong Zhang 
27163a40ed3dSBarry Smith   PetscFunctionReturn(0);
27170ac07820SSatish Balay }
271857b952d6SSatish Balay 
2719e090d566SSatish Balay #include "petscsys.h"
272057b952d6SSatish Balay 
27214a2ae208SSatish Balay #undef __FUNCT__
27224a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ"
2723dfbe8321SBarry Smith PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
272457b952d6SSatish Balay {
272557b952d6SSatish Balay   Mat            A;
27266849ba73SBarry Smith   PetscErrorCode ierr;
2727b24ad042SBarry Smith   int            fd;
2728b24ad042SBarry Smith   PetscInt       i,nz,j,rstart,rend;
272987828ca2SBarry Smith   PetscScalar    *vals,*buf;
273057b952d6SSatish Balay   MPI_Comm       comm = ((PetscObject)viewer)->comm;
273157b952d6SSatish Balay   MPI_Status     status;
2732b24ad042SBarry Smith   PetscMPIInt    rank,size,maxnz;
2733b24ad042SBarry Smith   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2734dc231df0SBarry Smith   PetscInt       *locrowlens,*procsnz = 0,*browners;
2735167e7480SBarry Smith   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2736dc231df0SBarry Smith   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2737b24ad042SBarry Smith   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2738dc231df0SBarry Smith   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
273957b952d6SSatish Balay 
2740d64ed03dSBarry Smith   PetscFunctionBegin;
2741b0a32e0cSBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
274257b952d6SSatish Balay 
2743d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2744d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
274557b952d6SSatish Balay   if (!rank) {
2746b0a32e0cSBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2747e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2748552e946dSBarry Smith     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
27496c5fab8fSBarry Smith   }
2750d64ed03dSBarry Smith 
2751b24ad042SBarry Smith   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
275257b952d6SSatish Balay   M = header[1]; N = header[2];
275357b952d6SSatish Balay 
275429bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
275557b952d6SSatish Balay 
275657b952d6SSatish Balay   /*
275757b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
275857b952d6SSatish Balay      divisible by the blocksize
275957b952d6SSatish Balay   */
276057b952d6SSatish Balay   Mbs        = M/bs;
2761dc231df0SBarry Smith   extra_rows = bs - M + bs*Mbs;
276257b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
276357b952d6SSatish Balay   else                  Mbs++;
276457b952d6SSatish Balay   if (extra_rows && !rank) {
276563ba0a88SBarry Smith     ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
276657b952d6SSatish Balay   }
2767537820f0SBarry Smith 
276857b952d6SSatish Balay   /* determine ownership of all rows */
276957b952d6SSatish Balay   mbs        = Mbs/size + ((Mbs % size) > rank);
277057b952d6SSatish Balay   m          = mbs*bs;
2771dc231df0SBarry Smith   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2772b24ad042SBarry Smith   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2773167e7480SBarry Smith 
2774167e7480SBarry Smith   /* process 0 needs enough room for process with most rows */
2775167e7480SBarry Smith   if (!rank) {
2776167e7480SBarry Smith     mmax = rowners[1];
2777167e7480SBarry Smith     for (i=2; i<size; i++) {
2778167e7480SBarry Smith       mmax = PetscMax(mmax,rowners[i]);
2779167e7480SBarry Smith     }
2780ca02efcfSSatish Balay     mmax*=bs;
2781167e7480SBarry Smith   } else mmax = m;
2782167e7480SBarry Smith 
278357b952d6SSatish Balay   rowners[0] = 0;
2784cee3aa6bSSatish Balay   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2785cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
278657b952d6SSatish Balay   rstart = rowners[rank];
278757b952d6SSatish Balay   rend   = rowners[rank+1];
278857b952d6SSatish Balay 
278957b952d6SSatish Balay   /* distribute row lengths to all processors */
279019c38ff2SBarry Smith   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
279157b952d6SSatish Balay   if (!rank) {
2792dc231df0SBarry Smith     mend = m;
2793dc231df0SBarry Smith     if (size == 1) mend = mend - extra_rows;
2794dc231df0SBarry Smith     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2795dc231df0SBarry Smith     for (j=mend; j<m; j++) locrowlens[j] = 1;
2796dc231df0SBarry Smith     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2797b24ad042SBarry Smith     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2798b24ad042SBarry Smith     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2799dc231df0SBarry Smith     for (j=0; j<m; j++) {
2800dc231df0SBarry Smith       procsnz[0] += locrowlens[j];
2801dc231df0SBarry Smith     }
2802dc231df0SBarry Smith     for (i=1; i<size; i++) {
2803dc231df0SBarry Smith       mend = browners[i+1] - browners[i];
2804dc231df0SBarry Smith       if (i == size-1) mend = mend - extra_rows;
2805dc231df0SBarry Smith       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2806dc231df0SBarry Smith       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2807dc231df0SBarry Smith       /* calculate the number of nonzeros on each processor */
2808dc231df0SBarry Smith       for (j=0; j<browners[i+1]-browners[i]; j++) {
280957b952d6SSatish Balay         procsnz[i] += rowlengths[j];
281057b952d6SSatish Balay       }
2811dc231df0SBarry Smith       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281257b952d6SSatish Balay     }
2813606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2814dc231df0SBarry Smith   } else {
2815dc231df0SBarry Smith     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2816dc231df0SBarry Smith   }
281757b952d6SSatish Balay 
2818dc231df0SBarry Smith   if (!rank) {
281957b952d6SSatish Balay     /* determine max buffer needed and allocate it */
28208a8e0b3aSBarry Smith     maxnz = procsnz[0];
2821cdc0ba36SBarry Smith     for (i=1; i<size; i++) {
282257b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
282357b952d6SSatish Balay     }
2824b24ad042SBarry Smith     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
282557b952d6SSatish Balay 
282657b952d6SSatish Balay     /* read in my part of the matrix column indices  */
282757b952d6SSatish Balay     nz     = procsnz[0];
282819c38ff2SBarry Smith     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
282957b952d6SSatish Balay     mycols = ibuf;
2830cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2831e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2832cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2833cee3aa6bSSatish Balay 
283457b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
283557b952d6SSatish Balay     for (i=1; i<size-1; i++) {
283657b952d6SSatish Balay       nz   = procsnz[i];
2837e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2838b24ad042SBarry Smith       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
283957b952d6SSatish Balay     }
284057b952d6SSatish Balay     /* read in the stuff for the last proc */
284157b952d6SSatish Balay     if (size != 1) {
284257b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2843e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
284457b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2845b24ad042SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
284657b952d6SSatish Balay     }
2847606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2848d64ed03dSBarry Smith   } else {
284957b952d6SSatish Balay     /* determine buffer space needed for message */
285057b952d6SSatish Balay     nz = 0;
285157b952d6SSatish Balay     for (i=0; i<m; i++) {
285257b952d6SSatish Balay       nz += locrowlens[i];
285357b952d6SSatish Balay     }
285419c38ff2SBarry Smith     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
285557b952d6SSatish Balay     mycols = ibuf;
285657b952d6SSatish Balay     /* receive message of column indices*/
2857b24ad042SBarry Smith     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2858b24ad042SBarry Smith     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
285929bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
286057b952d6SSatish Balay   }
286157b952d6SSatish Balay 
286257b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2863dc231df0SBarry Smith   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2864dc231df0SBarry Smith   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2865dc231df0SBarry Smith   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2866dc231df0SBarry Smith   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2867dc231df0SBarry Smith   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
286857b952d6SSatish Balay   rowcount = 0; nzcount = 0;
286957b952d6SSatish Balay   for (i=0; i<mbs; i++) {
287057b952d6SSatish Balay     dcount  = 0;
287157b952d6SSatish Balay     odcount = 0;
287257b952d6SSatish Balay     for (j=0; j<bs; j++) {
287357b952d6SSatish Balay       kmax = locrowlens[rowcount];
287457b952d6SSatish Balay       for (k=0; k<kmax; k++) {
287557b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
287657b952d6SSatish Balay         if (!mask[tmp]) {
287757b952d6SSatish Balay           mask[tmp] = 1;
287857b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
287957b952d6SSatish Balay           else masked1[dcount++] = tmp;
288057b952d6SSatish Balay         }
288157b952d6SSatish Balay       }
288257b952d6SSatish Balay       rowcount++;
288357b952d6SSatish Balay     }
2884cee3aa6bSSatish Balay 
288557b952d6SSatish Balay     dlens[i]  = dcount;
288657b952d6SSatish Balay     odlens[i] = odcount;
2887cee3aa6bSSatish Balay 
288857b952d6SSatish Balay     /* zero out the mask elements we set */
288957b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
289057b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
289157b952d6SSatish Balay   }
2892cee3aa6bSSatish Balay 
289357b952d6SSatish Balay   /* create our matrix */
289478ae41b4SKris Buschelman   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
289578ae41b4SKris Buschelman   ierr = MatSetType(A,type);CHKERRQ(ierr)
289678ae41b4SKris Buschelman   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
289778ae41b4SKris Buschelman 
289878ae41b4SKris Buschelman   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2899dc231df0SBarry Smith   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
290057b952d6SSatish Balay 
290157b952d6SSatish Balay   if (!rank) {
290219c38ff2SBarry Smith     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
290357b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
290457b952d6SSatish Balay     nz = procsnz[0];
290557b952d6SSatish Balay     vals = buf;
2906cee3aa6bSSatish Balay     mycols = ibuf;
2907cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2908e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2909cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2910537820f0SBarry Smith 
291157b952d6SSatish Balay     /* insert into matrix */
291257b952d6SSatish Balay     jj      = rstart*bs;
291357b952d6SSatish Balay     for (i=0; i<m; i++) {
2914dc231df0SBarry Smith       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
291557b952d6SSatish Balay       mycols += locrowlens[i];
291657b952d6SSatish Balay       vals   += locrowlens[i];
291757b952d6SSatish Balay       jj++;
291857b952d6SSatish Balay     }
291957b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
292057b952d6SSatish Balay     for (i=1; i<size-1; i++) {
292157b952d6SSatish Balay       nz   = procsnz[i];
292257b952d6SSatish Balay       vals = buf;
2923e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2924ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
292557b952d6SSatish Balay     }
292657b952d6SSatish Balay     /* the last proc */
292757b952d6SSatish Balay     if (size != 1){
292857b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2929cee3aa6bSSatish Balay       vals = buf;
2930e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
293157b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2932ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
293357b952d6SSatish Balay     }
2934606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2935d64ed03dSBarry Smith   } else {
293657b952d6SSatish Balay     /* receive numeric values */
293719c38ff2SBarry Smith     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
293857b952d6SSatish Balay 
293957b952d6SSatish Balay     /* receive message of values*/
294057b952d6SSatish Balay     vals   = buf;
2941cee3aa6bSSatish Balay     mycols = ibuf;
2942ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2943ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
294429bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
294557b952d6SSatish Balay 
294657b952d6SSatish Balay     /* insert into matrix */
294757b952d6SSatish Balay     jj      = rstart*bs;
2948cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
2949dc231df0SBarry Smith       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
295057b952d6SSatish Balay       mycols += locrowlens[i];
295157b952d6SSatish Balay       vals   += locrowlens[i];
295257b952d6SSatish Balay       jj++;
295357b952d6SSatish Balay     }
295457b952d6SSatish Balay   }
2955606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2956606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2957606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2958dc231df0SBarry Smith   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2959dc231df0SBarry Smith   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2960dc231df0SBarry Smith   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
29616d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
29626d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
296378ae41b4SKris Buschelman 
296478ae41b4SKris Buschelman   *newmat = A;
29653a40ed3dSBarry Smith   PetscFunctionReturn(0);
296657b952d6SSatish Balay }
296757b952d6SSatish Balay 
29684a2ae208SSatish Balay #undef __FUNCT__
29694a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2970133cdb44SSatish Balay /*@
2971133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2972133cdb44SSatish Balay 
2973133cdb44SSatish Balay    Input Parameters:
2974133cdb44SSatish Balay .  mat  - the matrix
2975133cdb44SSatish Balay .  fact - factor
2976133cdb44SSatish Balay 
2977fee21e36SBarry Smith    Collective on Mat
2978fee21e36SBarry Smith 
29798c890885SBarry Smith    Level: advanced
29808c890885SBarry Smith 
2981133cdb44SSatish Balay   Notes:
2982133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2983133cdb44SSatish Balay 
2984133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2985133cdb44SSatish Balay 
2986133cdb44SSatish Balay .seealso: MatSetOption()
2987133cdb44SSatish Balay @*/
2988*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2989133cdb44SSatish Balay {
2990dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscReal);
29915bf65638SKris Buschelman 
29925bf65638SKris Buschelman   PetscFunctionBegin;
29935bf65638SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
29945bf65638SKris Buschelman   if (f) {
29955bf65638SKris Buschelman     ierr = (*f)(mat,fact);CHKERRQ(ierr);
29965bf65638SKris Buschelman   }
29975bf65638SKris Buschelman   PetscFunctionReturn(0);
29985bf65638SKris Buschelman }
29995bf65638SKris Buschelman 
3000*be1d678aSKris Buschelman EXTERN_C_BEGIN
30015bf65638SKris Buschelman #undef __FUNCT__
30025bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3003*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
30045bf65638SKris Buschelman {
300525fdafccSSatish Balay   Mat_MPIBAIJ *baij;
3006133cdb44SSatish Balay 
3007133cdb44SSatish Balay   PetscFunctionBegin;
3008133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
3009133cdb44SSatish Balay   baij->ht_fact = fact;
3010133cdb44SSatish Balay   PetscFunctionReturn(0);
3011133cdb44SSatish Balay }
3012*be1d678aSKris Buschelman EXTERN_C_END
3013f2a5309cSSatish Balay 
30144a2ae208SSatish Balay #undef __FUNCT__
30154a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3016*be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3017f2a5309cSSatish Balay {
3018f2a5309cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3019f2a5309cSSatish Balay   PetscFunctionBegin;
3020f2a5309cSSatish Balay   *Ad     = a->A;
3021f2a5309cSSatish Balay   *Ao     = a->B;
3022195d93cdSBarry Smith   *colmap = a->garray;
3023f2a5309cSSatish Balay   PetscFunctionReturn(0);
3024f2a5309cSSatish Balay }
3025