xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ae15b995b5732fffd2de5a75cf61ef7190c6fef1)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
279bdfe76SSatish Balay 
3e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
479bdfe76SSatish Balay 
5dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6dfbe8321SBarry Smith EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7b24ad042SBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8b24ad042SBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9b24ad042SBarry Smith EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12b24ad042SBarry Smith EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13b24ad042SBarry Smith EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14dfbe8321SBarry Smith EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
15f4df32b1SMatthew Knepley EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
1693fea6afSBarry Smith 
1793fea6afSBarry Smith /*  UGLY, ugly, ugly
1887828ca2SBarry Smith    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
1993fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
2093fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2193fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2293fea6afSBarry Smith    into the single precision data structures.
2393fea6afSBarry Smith */
2493fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
25b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
3093fea6afSBarry Smith #else
316fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
3293fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
3393fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
346fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
356fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
3693fea6afSBarry Smith #endif
373b2fbd54SBarry Smith 
384a2ae208SSatish Balay #undef __FUNCT__
394a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
417843d17aSBarry Smith {
427843d17aSBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
43dfbe8321SBarry Smith   PetscErrorCode ierr;
44b24ad042SBarry Smith   PetscInt       i;
4587828ca2SBarry Smith   PetscScalar    *va,*vb;
467843d17aSBarry Smith   Vec            vtmp;
477843d17aSBarry Smith 
487843d17aSBarry Smith   PetscFunctionBegin;
497843d17aSBarry Smith 
507843d17aSBarry Smith   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
511ebc52fbSHong Zhang   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
527843d17aSBarry Smith 
53ac355199SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
547843d17aSBarry Smith   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
551ebc52fbSHong Zhang   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
567843d17aSBarry Smith 
57273d9f13SBarry Smith   for (i=0; i<A->m; i++){
58273d9f13SBarry Smith     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
597843d17aSBarry Smith   }
607843d17aSBarry Smith 
611ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
621ebc52fbSHong Zhang   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63ac355199SBarry Smith   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
647843d17aSBarry Smith 
657843d17aSBarry Smith   PetscFunctionReturn(0);
667843d17aSBarry Smith }
677843d17aSBarry Smith 
687fc3c18eSBarry Smith EXTERN_C_BEGIN
694a2ae208SSatish Balay #undef __FUNCT__
704a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
727fc3c18eSBarry Smith {
737fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
74dfbe8321SBarry Smith   PetscErrorCode ierr;
757fc3c18eSBarry Smith 
767fc3c18eSBarry Smith   PetscFunctionBegin;
777fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
787fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
797fc3c18eSBarry Smith   PetscFunctionReturn(0);
807fc3c18eSBarry Smith }
817fc3c18eSBarry Smith EXTERN_C_END
827fc3c18eSBarry Smith 
837fc3c18eSBarry Smith EXTERN_C_BEGIN
844a2ae208SSatish Balay #undef __FUNCT__
854a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
877fc3c18eSBarry Smith {
887fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
89dfbe8321SBarry Smith   PetscErrorCode ierr;
907fc3c18eSBarry Smith 
917fc3c18eSBarry Smith   PetscFunctionBegin;
927fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
937fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
947fc3c18eSBarry Smith   PetscFunctionReturn(0);
957fc3c18eSBarry Smith }
967fc3c18eSBarry Smith EXTERN_C_END
977fc3c18eSBarry Smith 
98537820f0SBarry Smith /*
99537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
10057b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
10157b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
10257b952d6SSatish Balay    length of colmap equals the global matrix length.
10357b952d6SSatish Balay */
1044a2ae208SSatish Balay #undef __FUNCT__
1054a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
10757b952d6SSatish Balay {
10857b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
10957b952d6SSatish Balay   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
1106849ba73SBarry Smith   PetscErrorCode ierr;
111521d7252SBarry Smith   PetscInt       nbs = B->nbs,i,bs=mat->bs;
11257b952d6SSatish Balay 
113d64ed03dSBarry Smith   PetscFunctionBegin;
114aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
115f14a1c24SBarry Smith   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
11648e59246SSatish Balay   for (i=0; i<nbs; i++){
1170f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
11848e59246SSatish Balay   }
11948e59246SSatish Balay #else
120b24ad042SBarry Smith   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
12152e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122b24ad042SBarry Smith   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
123928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
12448e59246SSatish Balay #endif
1253a40ed3dSBarry Smith   PetscFunctionReturn(0);
12657b952d6SSatish Balay }
12757b952d6SSatish Balay 
12880c1aa95SSatish Balay #define CHUNKSIZE  10
12980c1aa95SSatish Balay 
130f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
13180c1aa95SSatish Balay { \
13280c1aa95SSatish Balay  \
13380c1aa95SSatish Balay     brow = row/bs;  \
13480c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
13680c1aa95SSatish Balay       bcol = col/bs; \
13780c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
138ab26458aSBarry Smith       low = 0; high = nrow; \
139ab26458aSBarry Smith       while (high-low > 3) { \
140ab26458aSBarry Smith         t = (low+high)/2; \
141ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
142ab26458aSBarry Smith         else              low  = t; \
143ab26458aSBarry Smith       } \
144ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
14580c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
14680c1aa95SSatish Balay         if (rp[_i] == bcol) { \
14780c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
148eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
149eada6651SSatish Balay           else                    *bap  = value;  \
150ac7a638eSSatish Balay           goto a_noinsert; \
15180c1aa95SSatish Balay         } \
15280c1aa95SSatish Balay       } \
15389280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
154085a36d4SBarry Smith       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
155ed1caa07SMatthew Knepley       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
15680c1aa95SSatish Balay       N = nrow++ - 1;  \
15780c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
15880c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
15980c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1603eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
16180c1aa95SSatish Balay       } \
1623eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
16380c1aa95SSatish Balay       rp[_i]                      = bcol;  \
16480c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
165ac7a638eSSatish Balay       a_noinsert:; \
16680c1aa95SSatish Balay     ailen[brow] = nrow; \
16780c1aa95SSatish Balay }
16857b952d6SSatish Balay 
169ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
170ac7a638eSSatish Balay { \
171ac7a638eSSatish Balay     brow = row/bs;  \
172ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
174ac7a638eSSatish Balay       bcol = col/bs; \
175ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
176ac7a638eSSatish Balay       low = 0; high = nrow; \
177ac7a638eSSatish Balay       while (high-low > 3) { \
178ac7a638eSSatish Balay         t = (low+high)/2; \
179ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
180ac7a638eSSatish Balay         else              low  = t; \
181ac7a638eSSatish Balay       } \
182ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
183ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
184ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
185ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
187ac7a638eSSatish Balay           else                    *bap  = value;  \
188ac7a638eSSatish Balay           goto b_noinsert; \
189ac7a638eSSatish Balay         } \
190ac7a638eSSatish Balay       } \
19189280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
192085a36d4SBarry Smith       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
193ed1caa07SMatthew Knepley       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
194085a36d4SBarry Smith       CHKMEMQ;\
195ac7a638eSSatish Balay       N = nrow++ - 1;  \
196ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
197ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
198ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
1993eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
200ac7a638eSSatish Balay       } \
2013eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
202ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
203ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
204ac7a638eSSatish Balay       b_noinsert:; \
205ac7a638eSSatish Balay     bilen[brow] = nrow; \
206ac7a638eSSatish Balay }
207ac7a638eSSatish Balay 
20893fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2094a2ae208SSatish Balay #undef __FUNCT__
2104a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ"
211b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
21257b952d6SSatish Balay {
2136fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
214dfbe8321SBarry Smith   PetscErrorCode ierr;
215b24ad042SBarry Smith   PetscInt       i,N = m*n;
2166fa18ffdSBarry Smith   MatScalar      *vsingle;
21793fea6afSBarry Smith 
21893fea6afSBarry Smith   PetscFunctionBegin;
2196fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2206fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
22182502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2226fa18ffdSBarry Smith     b->setvalueslen  = N;
2236fa18ffdSBarry Smith   }
2246fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2256fa18ffdSBarry Smith 
22693fea6afSBarry Smith   for (i=0; i<N; i++) {
22793fea6afSBarry Smith     vsingle[i] = v[i];
22893fea6afSBarry Smith   }
22993fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
23093fea6afSBarry Smith   PetscFunctionReturn(0);
23193fea6afSBarry Smith }
23293fea6afSBarry Smith 
2334a2ae208SSatish Balay #undef __FUNCT__
2344a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
235b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
23693fea6afSBarry Smith {
2376fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
238dfbe8321SBarry Smith   PetscErrorCode ierr;
239b24ad042SBarry Smith   PetscInt       i,N = m*n*b->bs2;
2406fa18ffdSBarry Smith   MatScalar      *vsingle;
24193fea6afSBarry Smith 
24293fea6afSBarry Smith   PetscFunctionBegin;
2436fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2446fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
24582502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2466fa18ffdSBarry Smith     b->setvalueslen  = N;
2476fa18ffdSBarry Smith   }
2486fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
24993fea6afSBarry Smith   for (i=0; i<N; i++) {
25093fea6afSBarry Smith     vsingle[i] = v[i];
25193fea6afSBarry Smith   }
25293fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
25393fea6afSBarry Smith   PetscFunctionReturn(0);
25493fea6afSBarry Smith }
2556fa18ffdSBarry Smith 
2564a2ae208SSatish Balay #undef __FUNCT__
2574a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
258b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
2596fa18ffdSBarry Smith {
2606fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261dfbe8321SBarry Smith   PetscErrorCode ierr;
262b24ad042SBarry Smith   PetscInt       i,N = m*n;
2636fa18ffdSBarry Smith   MatScalar      *vsingle;
2646fa18ffdSBarry Smith 
2656fa18ffdSBarry Smith   PetscFunctionBegin;
2666fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2676fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
26882502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2696fa18ffdSBarry Smith     b->setvalueslen  = N;
2706fa18ffdSBarry Smith   }
2716fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2726fa18ffdSBarry Smith   for (i=0; i<N; i++) {
2736fa18ffdSBarry Smith     vsingle[i] = v[i];
2746fa18ffdSBarry Smith   }
2756fa18ffdSBarry Smith   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
2766fa18ffdSBarry Smith   PetscFunctionReturn(0);
2776fa18ffdSBarry Smith }
2786fa18ffdSBarry Smith 
2794a2ae208SSatish Balay #undef __FUNCT__
2804a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
281b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
2826fa18ffdSBarry Smith {
2836fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
284dfbe8321SBarry Smith   PetscErrorCode ierr;
285b24ad042SBarry Smith   PetscInt       i,N = m*n*b->bs2;
2866fa18ffdSBarry Smith   MatScalar      *vsingle;
2876fa18ffdSBarry Smith 
2886fa18ffdSBarry Smith   PetscFunctionBegin;
2896fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2906fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
29182502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2926fa18ffdSBarry Smith     b->setvalueslen  = N;
2936fa18ffdSBarry Smith   }
2946fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2956fa18ffdSBarry Smith   for (i=0; i<N; i++) {
2966fa18ffdSBarry Smith     vsingle[i] = v[i];
2976fa18ffdSBarry Smith   }
2986fa18ffdSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
2996fa18ffdSBarry Smith   PetscFunctionReturn(0);
3006fa18ffdSBarry Smith }
30193fea6afSBarry Smith #endif
30293fea6afSBarry Smith 
3034a2ae208SSatish Balay #undef __FUNCT__
304e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
305b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
30693fea6afSBarry Smith {
30757b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
30893fea6afSBarry Smith   MatScalar      value;
309273d9f13SBarry Smith   PetscTruth     roworiented = baij->roworiented;
310dfbe8321SBarry Smith   PetscErrorCode ierr;
311b24ad042SBarry Smith   PetscInt       i,j,row,col;
312b24ad042SBarry Smith   PetscInt       rstart_orig=baij->rstart_bs;
313b24ad042SBarry Smith   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
314521d7252SBarry Smith   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
31557b952d6SSatish Balay 
316eada6651SSatish Balay   /* Some Variables required in the macro */
31780c1aa95SSatish Balay   Mat            A = baij->A;
31880c1aa95SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
319b24ad042SBarry Smith   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
3203eda8832SBarry Smith   MatScalar      *aa=a->a;
321ac7a638eSSatish Balay 
322ac7a638eSSatish Balay   Mat            B = baij->B;
323ac7a638eSSatish Balay   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
324b24ad042SBarry Smith   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3253eda8832SBarry Smith   MatScalar      *ba=b->a;
326ac7a638eSSatish Balay 
327b24ad042SBarry Smith   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
328b24ad042SBarry Smith   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
3293eda8832SBarry Smith   MatScalar      *ap,*bap;
33080c1aa95SSatish Balay 
331d64ed03dSBarry Smith   PetscFunctionBegin;
33257b952d6SSatish Balay   for (i=0; i<m; i++) {
3335ef9f2a5SBarry Smith     if (im[i] < 0) continue;
3342515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
33577431f27SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
336639f9d9dSBarry Smith #endif
33757b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
33857b952d6SSatish Balay       row = im[i] - rstart_orig;
33957b952d6SSatish Balay       for (j=0; j<n; j++) {
34057b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
34157b952d6SSatish Balay           col = in[j] - cstart_orig;
34257b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
343f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
34480c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
34573959e64SBarry Smith         } else if (in[j] < 0) continue;
3462515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
34777431f27SBarry Smith         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
348639f9d9dSBarry Smith #endif
34957b952d6SSatish Balay         else {
35057b952d6SSatish Balay           if (mat->was_assembled) {
351905e6a2fSBarry Smith             if (!baij->colmap) {
352905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
353905e6a2fSBarry Smith             }
354aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
3550f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
356bba1ac68SSatish Balay             col  = col - 1;
35748e59246SSatish Balay #else
358bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
35948e59246SSatish Balay #endif
36057b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
36157b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3628295de27SSatish Balay               col =  in[j];
3639bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
3649bf004c3SSatish Balay               B = baij->B;
3659bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
3669bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
3679bf004c3SSatish Balay               ba=b->a;
368bba1ac68SSatish Balay             } else col += in[j]%bs;
3698295de27SSatish Balay           } else col = in[j];
37057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
37190da58bdSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
37290da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
37357b952d6SSatish Balay         }
37457b952d6SSatish Balay       }
375d64ed03dSBarry Smith     } else {
37690f02eecSBarry Smith       if (!baij->donotstash) {
377ff2fd236SBarry Smith         if (roworiented) {
3786fa18ffdSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
379ff2fd236SBarry Smith         } else {
3806fa18ffdSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
38157b952d6SSatish Balay         }
38257b952d6SSatish Balay       }
38357b952d6SSatish Balay     }
38490f02eecSBarry Smith   }
3853a40ed3dSBarry Smith   PetscFunctionReturn(0);
38657b952d6SSatish Balay }
38757b952d6SSatish Balay 
3884a2ae208SSatish Balay #undef __FUNCT__
3894a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
390b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
391ab26458aSBarry Smith {
392ab26458aSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
393f15d580aSBarry Smith   const MatScalar *value;
394f15d580aSBarry Smith   MatScalar       *barray=baij->barray;
395273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
396dfbe8321SBarry Smith   PetscErrorCode  ierr;
397b24ad042SBarry Smith   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
398b24ad042SBarry Smith   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
399521d7252SBarry Smith   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
400ab26458aSBarry Smith 
401b16ae2b1SBarry Smith   PetscFunctionBegin;
40230793edcSSatish Balay   if(!barray) {
40382502324SSatish Balay     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
40482502324SSatish Balay     baij->barray = barray;
40530793edcSSatish Balay   }
40630793edcSSatish Balay 
407ab26458aSBarry Smith   if (roworiented) {
408ab26458aSBarry Smith     stepval = (n-1)*bs;
409ab26458aSBarry Smith   } else {
410ab26458aSBarry Smith     stepval = (m-1)*bs;
411ab26458aSBarry Smith   }
412ab26458aSBarry Smith   for (i=0; i<m; i++) {
4135ef9f2a5SBarry Smith     if (im[i] < 0) continue;
4142515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
41577431f27SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
416ab26458aSBarry Smith #endif
417ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
418ab26458aSBarry Smith       row = im[i] - rstart;
419ab26458aSBarry Smith       for (j=0; j<n; j++) {
42015b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
42115b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
422f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
42315b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
424f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
42515b57d14SSatish Balay         } else { /* Here a copy is required */
426ab26458aSBarry Smith           if (roworiented) {
427ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
428ab26458aSBarry Smith           } else {
429ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
430abef11f7SSatish Balay           }
43147513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
43247513183SBarry Smith             for (jj=0; jj<bs; jj++) {
43330793edcSSatish Balay               *barray++  = *value++;
43447513183SBarry Smith             }
43547513183SBarry Smith           }
43630793edcSSatish Balay           barray -=bs2;
43715b57d14SSatish Balay         }
438abef11f7SSatish Balay 
439abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
440abef11f7SSatish Balay           col  = in[j] - cstart;
44193fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
442ab26458aSBarry Smith         }
4435ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
4442515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
44577431f27SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
446ab26458aSBarry Smith #endif
447ab26458aSBarry Smith         else {
448ab26458aSBarry Smith           if (mat->was_assembled) {
449ab26458aSBarry Smith             if (!baij->colmap) {
450ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
451ab26458aSBarry Smith             }
452a5eb4965SSatish Balay 
4532515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
454aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
455b24ad042SBarry Smith             { PetscInt data;
4560f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
45729bbc08cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
458fa46199cSSatish Balay             }
45948e59246SSatish Balay #else
46029bbc08cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461a5eb4965SSatish Balay #endif
46248e59246SSatish Balay #endif
463aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4640f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
465fa46199cSSatish Balay             col  = (col - 1)/bs;
46648e59246SSatish Balay #else
467a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
46848e59246SSatish Balay #endif
469ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
470ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
471ab26458aSBarry Smith               col =  in[j];
472ab26458aSBarry Smith             }
473ab26458aSBarry Smith           }
474ab26458aSBarry Smith           else col = in[j];
47593fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
476ab26458aSBarry Smith         }
477ab26458aSBarry Smith       }
478d64ed03dSBarry Smith     } else {
479ab26458aSBarry Smith       if (!baij->donotstash) {
480ff2fd236SBarry Smith         if (roworiented) {
4816fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
482ff2fd236SBarry Smith         } else {
4836fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
484ff2fd236SBarry Smith         }
485abef11f7SSatish Balay       }
486ab26458aSBarry Smith     }
487ab26458aSBarry Smith   }
4883a40ed3dSBarry Smith   PetscFunctionReturn(0);
489ab26458aSBarry Smith }
4906fa18ffdSBarry Smith 
4910bdbc534SSatish Balay #define HASH_KEY 0.6180339887
492b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
493b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
494b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
4954a2ae208SSatish Balay #undef __FUNCT__
4964a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
497b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
4980bdbc534SSatish Balay {
4990bdbc534SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
500273d9f13SBarry Smith   PetscTruth     roworiented = baij->roworiented;
501dfbe8321SBarry Smith   PetscErrorCode ierr;
502b24ad042SBarry Smith   PetscInt       i,j,row,col;
503b24ad042SBarry Smith   PetscInt       rstart_orig=baij->rstart_bs;
504b24ad042SBarry Smith   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
505521d7252SBarry Smith   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
506329f5518SBarry Smith   PetscReal      tmp;
5073eda8832SBarry Smith   MatScalar      **HD = baij->hd,value;
5082515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
509b24ad042SBarry Smith   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5104a15367fSSatish Balay #endif
5110bdbc534SSatish Balay 
5120bdbc534SSatish Balay   PetscFunctionBegin;
5130bdbc534SSatish Balay 
5140bdbc534SSatish Balay   for (i=0; i<m; i++) {
5152515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
51629bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
51777431f27SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
5180bdbc534SSatish Balay #endif
5190bdbc534SSatish Balay       row = im[i];
520c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
5210bdbc534SSatish Balay       for (j=0; j<n; j++) {
5220bdbc534SSatish Balay         col = in[j];
5236fa18ffdSBarry Smith         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
524b24ad042SBarry Smith         /* Look up PetscInto the Hash Table */
525c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
526c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5270bdbc534SSatish Balay 
528c2760754SSatish Balay 
529c2760754SSatish Balay         idx = h1;
5302515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
531187ce0cbSSatish Balay         insert_ct++;
532187ce0cbSSatish Balay         total_ct++;
533187ce0cbSSatish Balay         if (HT[idx] != key) {
534187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
535187ce0cbSSatish Balay           if (idx == size) {
536187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
537187ce0cbSSatish Balay             if (idx == h1) {
53877431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
539187ce0cbSSatish Balay             }
540187ce0cbSSatish Balay           }
541187ce0cbSSatish Balay         }
542187ce0cbSSatish Balay #else
543c2760754SSatish Balay         if (HT[idx] != key) {
544c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
545c2760754SSatish Balay           if (idx == size) {
546c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
547c2760754SSatish Balay             if (idx == h1) {
54877431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
549c2760754SSatish Balay             }
550c2760754SSatish Balay           }
551c2760754SSatish Balay         }
552187ce0cbSSatish Balay #endif
553c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
554c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
555c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
5560bdbc534SSatish Balay       }
5570bdbc534SSatish Balay     } else {
5580bdbc534SSatish Balay       if (!baij->donotstash) {
559ff2fd236SBarry Smith         if (roworiented) {
5608798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
561ff2fd236SBarry Smith         } else {
5628798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5630bdbc534SSatish Balay         }
5640bdbc534SSatish Balay       }
5650bdbc534SSatish Balay     }
5660bdbc534SSatish Balay   }
5672515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
568187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
569187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
570187ce0cbSSatish Balay #endif
5710bdbc534SSatish Balay   PetscFunctionReturn(0);
5720bdbc534SSatish Balay }
5730bdbc534SSatish Balay 
5744a2ae208SSatish Balay #undef __FUNCT__
5754a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
576b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
5770bdbc534SSatish Balay {
5780bdbc534SSatish Balay   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
579273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
580dfbe8321SBarry Smith   PetscErrorCode  ierr;
581b24ad042SBarry Smith   PetscInt        i,j,ii,jj,row,col;
582b24ad042SBarry Smith   PetscInt        rstart=baij->rstart ;
583521d7252SBarry Smith   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
584b24ad042SBarry Smith   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
585329f5518SBarry Smith   PetscReal       tmp;
5863eda8832SBarry Smith   MatScalar       **HD = baij->hd,*baij_a;
587f15d580aSBarry Smith   const MatScalar *v_t,*value;
5882515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
589b24ad042SBarry Smith   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5904a15367fSSatish Balay #endif
5910bdbc534SSatish Balay 
592d0a41580SSatish Balay   PetscFunctionBegin;
593d0a41580SSatish Balay 
5940bdbc534SSatish Balay   if (roworiented) {
5950bdbc534SSatish Balay     stepval = (n-1)*bs;
5960bdbc534SSatish Balay   } else {
5970bdbc534SSatish Balay     stepval = (m-1)*bs;
5980bdbc534SSatish Balay   }
5990bdbc534SSatish Balay   for (i=0; i<m; i++) {
6002515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
60177431f27SBarry Smith     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
60277431f27SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
6030bdbc534SSatish Balay #endif
6040bdbc534SSatish Balay     row   = im[i];
605187ce0cbSSatish Balay     v_t   = v + i*bs2;
606c2760754SSatish Balay     if (row >= rstart && row < rend) {
6070bdbc534SSatish Balay       for (j=0; j<n; j++) {
6080bdbc534SSatish Balay         col = in[j];
6090bdbc534SSatish Balay 
6100bdbc534SSatish Balay         /* Look up into the Hash Table */
611c2760754SSatish Balay         key = row*Nbs+col+1;
612c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6130bdbc534SSatish Balay 
614c2760754SSatish Balay         idx = h1;
6152515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
616187ce0cbSSatish Balay         total_ct++;
617187ce0cbSSatish Balay         insert_ct++;
618187ce0cbSSatish Balay        if (HT[idx] != key) {
619187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
620187ce0cbSSatish Balay           if (idx == size) {
621187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
622187ce0cbSSatish Balay             if (idx == h1) {
62377431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
624187ce0cbSSatish Balay             }
625187ce0cbSSatish Balay           }
626187ce0cbSSatish Balay         }
627187ce0cbSSatish Balay #else
628c2760754SSatish Balay         if (HT[idx] != key) {
629c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
630c2760754SSatish Balay           if (idx == size) {
631c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
632c2760754SSatish Balay             if (idx == h1) {
63377431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
634c2760754SSatish Balay             }
635c2760754SSatish Balay           }
636c2760754SSatish Balay         }
637187ce0cbSSatish Balay #endif
638c2760754SSatish Balay         baij_a = HD[idx];
6390bdbc534SSatish Balay         if (roworiented) {
640c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
641187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
642187ce0cbSSatish Balay           value = v_t;
643187ce0cbSSatish Balay           v_t  += bs;
644fef45726SSatish Balay           if (addv == ADD_VALUES) {
645c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
646c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
647fef45726SSatish Balay                 baij_a[jj]  += *value++;
648b4cc0f5aSSatish Balay               }
649b4cc0f5aSSatish Balay             }
650fef45726SSatish Balay           } else {
651c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
652c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
653fef45726SSatish Balay                 baij_a[jj]  = *value++;
654fef45726SSatish Balay               }
655fef45726SSatish Balay             }
656fef45726SSatish Balay           }
6570bdbc534SSatish Balay         } else {
6580bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
659fef45726SSatish Balay           if (addv == ADD_VALUES) {
660b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
6610bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
662fef45726SSatish Balay                 baij_a[jj]  += *value++;
663fef45726SSatish Balay               }
664fef45726SSatish Balay             }
665fef45726SSatish Balay           } else {
666fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
667fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
668fef45726SSatish Balay                 baij_a[jj]  = *value++;
669fef45726SSatish Balay               }
670b4cc0f5aSSatish Balay             }
6710bdbc534SSatish Balay           }
6720bdbc534SSatish Balay         }
6730bdbc534SSatish Balay       }
6740bdbc534SSatish Balay     } else {
6750bdbc534SSatish Balay       if (!baij->donotstash) {
6760bdbc534SSatish Balay         if (roworiented) {
6778798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6780bdbc534SSatish Balay         } else {
6798798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
6800bdbc534SSatish Balay         }
6810bdbc534SSatish Balay       }
6820bdbc534SSatish Balay     }
6830bdbc534SSatish Balay   }
6842515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
685187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
686187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
687187ce0cbSSatish Balay #endif
6880bdbc534SSatish Balay   PetscFunctionReturn(0);
6890bdbc534SSatish Balay }
690133cdb44SSatish Balay 
6914a2ae208SSatish Balay #undef __FUNCT__
6924a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ"
693b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
694d6de1c52SSatish Balay {
695d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
6966849ba73SBarry Smith   PetscErrorCode ierr;
697521d7252SBarry Smith   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
698b24ad042SBarry Smith   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
699d6de1c52SSatish Balay 
700133cdb44SSatish Balay   PetscFunctionBegin;
701d6de1c52SSatish Balay   for (i=0; i<m; i++) {
70277431f27SBarry Smith     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
70377431f27SBarry Smith     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
704d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
705d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
706d6de1c52SSatish Balay       for (j=0; j<n; j++) {
70777431f27SBarry Smith         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
70877431f27SBarry Smith         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
709d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
710d6de1c52SSatish Balay           col = idxn[j] - bscstart;
71198dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
712d64ed03dSBarry Smith         } else {
713905e6a2fSBarry Smith           if (!baij->colmap) {
714905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
715905e6a2fSBarry Smith           }
716aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7170f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
718fa46199cSSatish Balay           data --;
71948e59246SSatish Balay #else
72048e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
72148e59246SSatish Balay #endif
72248e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
723d9d09a02SSatish Balay           else {
72448e59246SSatish Balay             col  = data + idxn[j]%bs;
72598dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
726d6de1c52SSatish Balay           }
727d6de1c52SSatish Balay         }
728d6de1c52SSatish Balay       }
729d64ed03dSBarry Smith     } else {
73029bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
731d6de1c52SSatish Balay     }
732d6de1c52SSatish Balay   }
7333a40ed3dSBarry Smith  PetscFunctionReturn(0);
734d6de1c52SSatish Balay }
735d6de1c52SSatish Balay 
7364a2ae208SSatish Balay #undef __FUNCT__
7374a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ"
738dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
739d6de1c52SSatish Balay {
740d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
741d6de1c52SSatish Balay   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
742dfbe8321SBarry Smith   PetscErrorCode ierr;
7438a62d963SHong Zhang   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col;
744329f5518SBarry Smith   PetscReal      sum = 0.0;
7453eda8832SBarry Smith   MatScalar      *v;
746d6de1c52SSatish Balay 
747d64ed03dSBarry Smith   PetscFunctionBegin;
748d6de1c52SSatish Balay   if (baij->size == 1) {
749064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
750d6de1c52SSatish Balay   } else {
751d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
752d6de1c52SSatish Balay       v = amat->a;
7538a62d963SHong Zhang       nz = amat->nz*bs2;
7548a62d963SHong Zhang       for (i=0; i<nz; i++) {
755aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
756329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
757d6de1c52SSatish Balay #else
758d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
759d6de1c52SSatish Balay #endif
760d6de1c52SSatish Balay       }
761d6de1c52SSatish Balay       v = bmat->a;
7628a62d963SHong Zhang       nz = bmat->nz*bs2;
7638a62d963SHong Zhang       for (i=0; i<nz; i++) {
764aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
765329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
766d6de1c52SSatish Balay #else
767d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
768d6de1c52SSatish Balay #endif
769d6de1c52SSatish Balay       }
770064f8208SBarry Smith       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
771064f8208SBarry Smith       *nrm = sqrt(*nrm);
7728a62d963SHong Zhang     } else if (type == NORM_1) { /* max column sum */
7738a62d963SHong Zhang       PetscReal *tmp,*tmp2;
7748a62d963SHong Zhang       PetscInt  *jj,*garray=baij->garray,cstart=baij->cstart;
7758a62d963SHong Zhang       ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
7768a62d963SHong Zhang       tmp2 = tmp + mat->N;
7778a62d963SHong Zhang       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
7788a62d963SHong Zhang       v = amat->a; jj = amat->j;
7798a62d963SHong Zhang       for (i=0; i<amat->nz; i++) {
7808a62d963SHong Zhang         for (j=0; j<bs; j++){
7818a62d963SHong Zhang           col = bs*(cstart + *jj) + j; /* column index */
7828a62d963SHong Zhang           for (row=0; row<bs; row++){
7838a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v);  v++;
7848a62d963SHong Zhang           }
7858a62d963SHong Zhang         }
7868a62d963SHong Zhang         jj++;
7878a62d963SHong Zhang       }
7888a62d963SHong Zhang       v = bmat->a; jj = bmat->j;
7898a62d963SHong Zhang       for (i=0; i<bmat->nz; i++) {
7908a62d963SHong Zhang         for (j=0; j<bs; j++){
7918a62d963SHong Zhang           col = bs*garray[*jj] + j;
7928a62d963SHong Zhang           for (row=0; row<bs; row++){
7938a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v); v++;
7948a62d963SHong Zhang           }
7958a62d963SHong Zhang         }
7968a62d963SHong Zhang         jj++;
7978a62d963SHong Zhang       }
7988a62d963SHong Zhang       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
7998a62d963SHong Zhang       *nrm = 0.0;
8008a62d963SHong Zhang       for (j=0; j<mat->N; j++) {
8018a62d963SHong Zhang         if (tmp2[j] > *nrm) *nrm = tmp2[j];
8028a62d963SHong Zhang       }
8038a62d963SHong Zhang       ierr = PetscFree(tmp);CHKERRQ(ierr);
8048a62d963SHong Zhang     } else if (type == NORM_INFINITY) { /* max row sum */
805577dd1f9SKris Buschelman       PetscReal *sums;
806577dd1f9SKris Buschelman       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
8078a62d963SHong Zhang       sum = 0.0;
8088a62d963SHong Zhang       for (j=0; j<amat->mbs; j++) {
8098a62d963SHong Zhang         for (row=0; row<bs; row++) sums[row] = 0.0;
8108a62d963SHong Zhang         v = amat->a + bs2*amat->i[j];
8118a62d963SHong Zhang         nz = amat->i[j+1]-amat->i[j];
8128a62d963SHong Zhang         for (i=0; i<nz; i++) {
8138a62d963SHong Zhang           for (col=0; col<bs; col++){
8148a62d963SHong Zhang             for (row=0; row<bs; row++){
8158a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
8168a62d963SHong Zhang             }
8178a62d963SHong Zhang           }
8188a62d963SHong Zhang         }
8198a62d963SHong Zhang         v = bmat->a + bs2*bmat->i[j];
8208a62d963SHong Zhang         nz = bmat->i[j+1]-bmat->i[j];
8218a62d963SHong Zhang         for (i=0; i<nz; i++) {
8228a62d963SHong Zhang           for (col=0; col<bs; col++){
8238a62d963SHong Zhang             for (row=0; row<bs; row++){
8248a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
8258a62d963SHong Zhang             }
8268a62d963SHong Zhang           }
8278a62d963SHong Zhang         }
8288a62d963SHong Zhang         for (row=0; row<bs; row++){
8298a62d963SHong Zhang           if (sums[row] > sum) sum = sums[row];
8308a62d963SHong Zhang         }
8318a62d963SHong Zhang       }
8328a62d963SHong Zhang       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
833577dd1f9SKris Buschelman       ierr = PetscFree(sums);CHKERRQ(ierr);
834d64ed03dSBarry Smith     } else {
83529bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
836d6de1c52SSatish Balay     }
837d64ed03dSBarry Smith   }
8383a40ed3dSBarry Smith   PetscFunctionReturn(0);
839d6de1c52SSatish Balay }
84057b952d6SSatish Balay 
841fef45726SSatish Balay /*
842fef45726SSatish Balay   Creates the hash table, and sets the table
843fef45726SSatish Balay   This table is created only once.
844fef45726SSatish Balay   If new entried need to be added to the matrix
845fef45726SSatish Balay   then the hash table has to be destroyed and
846fef45726SSatish Balay   recreated.
847fef45726SSatish Balay */
8484a2ae208SSatish Balay #undef __FUNCT__
8494a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851596b8d2eSBarry Smith {
852596b8d2eSBarry Smith   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
853596b8d2eSBarry Smith   Mat            A = baij->A,B=baij->B;
854596b8d2eSBarry Smith   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855b24ad042SBarry Smith   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
8566849ba73SBarry Smith   PetscErrorCode ierr;
857b24ad042SBarry Smith   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
858b24ad042SBarry Smith   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
859b24ad042SBarry Smith   PetscInt       *HT,key;
8603eda8832SBarry Smith   MatScalar      **HD;
861329f5518SBarry Smith   PetscReal      tmp;
8626cf91177SBarry Smith #if defined(PETSC_USE_INFO)
863b24ad042SBarry Smith   PetscInt       ct=0,max=0;
8644a15367fSSatish Balay #endif
865fef45726SSatish Balay 
866d64ed03dSBarry Smith   PetscFunctionBegin;
867b24ad042SBarry Smith   baij->ht_size=(PetscInt)(factor*nz);
8680bdbc534SSatish Balay   size = baij->ht_size;
869fef45726SSatish Balay 
8700bdbc534SSatish Balay   if (baij->ht) {
8710bdbc534SSatish Balay     PetscFunctionReturn(0);
872596b8d2eSBarry Smith   }
8730bdbc534SSatish Balay 
874fef45726SSatish Balay   /* Allocate Memory for Hash Table */
875b24ad042SBarry Smith   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
876b24ad042SBarry Smith   baij->ht = (PetscInt*)(baij->hd + size);
877b9e4cc15SSatish Balay   HD       = baij->hd;
878a07cd24cSSatish Balay   HT       = baij->ht;
879b9e4cc15SSatish Balay 
880b9e4cc15SSatish Balay 
881b24ad042SBarry Smith   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
8820bdbc534SSatish Balay 
883596b8d2eSBarry Smith 
884596b8d2eSBarry Smith   /* Loop Over A */
8850bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
886596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
8870bdbc534SSatish Balay       row = i+rstart;
8880bdbc534SSatish Balay       col = aj[j]+cstart;
889596b8d2eSBarry Smith 
890187ce0cbSSatish Balay       key = row*Nbs + col + 1;
891c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8920bdbc534SSatish Balay       for (k=0; k<size; k++){
893958c9bccSBarry Smith         if (!HT[(h1+k)%size]) {
8940bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8950bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
896596b8d2eSBarry Smith           break;
8976cf91177SBarry Smith #if defined(PETSC_USE_INFO)
898187ce0cbSSatish Balay         } else {
899187ce0cbSSatish Balay           ct++;
900187ce0cbSSatish Balay #endif
901596b8d2eSBarry Smith         }
902187ce0cbSSatish Balay       }
9036cf91177SBarry Smith #if defined(PETSC_USE_INFO)
904187ce0cbSSatish Balay       if (k> max) max = k;
905187ce0cbSSatish Balay #endif
906596b8d2eSBarry Smith     }
907596b8d2eSBarry Smith   }
908596b8d2eSBarry Smith   /* Loop Over B */
9090bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
910596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
9110bdbc534SSatish Balay       row = i+rstart;
9120bdbc534SSatish Balay       col = garray[bj[j]];
913187ce0cbSSatish Balay       key = row*Nbs + col + 1;
914c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9150bdbc534SSatish Balay       for (k=0; k<size; k++){
916958c9bccSBarry Smith         if (!HT[(h1+k)%size]) {
9170bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9180bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
919596b8d2eSBarry Smith           break;
9206cf91177SBarry Smith #if defined(PETSC_USE_INFO)
921187ce0cbSSatish Balay         } else {
922187ce0cbSSatish Balay           ct++;
923187ce0cbSSatish Balay #endif
924596b8d2eSBarry Smith         }
925187ce0cbSSatish Balay       }
9266cf91177SBarry Smith #if defined(PETSC_USE_INFO)
927187ce0cbSSatish Balay       if (k> max) max = k;
928187ce0cbSSatish Balay #endif
929596b8d2eSBarry Smith     }
930596b8d2eSBarry Smith   }
931596b8d2eSBarry Smith 
932596b8d2eSBarry Smith   /* Print Summary */
9336cf91177SBarry Smith #if defined(PETSC_USE_INFO)
934c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
935596b8d2eSBarry Smith     if (HT[i]) {j++;}
936c38d4ed2SBarry Smith   }
937*ae15b995SBarry Smith   ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
938187ce0cbSSatish Balay #endif
9393a40ed3dSBarry Smith   PetscFunctionReturn(0);
940596b8d2eSBarry Smith }
94157b952d6SSatish Balay 
9424a2ae208SSatish Balay #undef __FUNCT__
9434a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
944dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
945bbb85fb3SSatish Balay {
946bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
947dfbe8321SBarry Smith   PetscErrorCode ierr;
948b24ad042SBarry Smith   PetscInt       nstash,reallocs;
949bbb85fb3SSatish Balay   InsertMode     addv;
950bbb85fb3SSatish Balay 
951bbb85fb3SSatish Balay   PetscFunctionBegin;
952bbb85fb3SSatish Balay   if (baij->donotstash) {
953bbb85fb3SSatish Balay     PetscFunctionReturn(0);
954bbb85fb3SSatish Balay   }
955bbb85fb3SSatish Balay 
956bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
957bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
958bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
95929bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
960bbb85fb3SSatish Balay   }
961bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
962bbb85fb3SSatish Balay 
9638798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
9648798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
9658798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
966*ae15b995SBarry Smith   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
96746680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
968*ae15b995SBarry Smith   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
969bbb85fb3SSatish Balay   PetscFunctionReturn(0);
970bbb85fb3SSatish Balay }
971bbb85fb3SSatish Balay 
9724a2ae208SSatish Balay #undef __FUNCT__
9734a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
974dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
975bbb85fb3SSatish Balay {
976bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
977a2d1c673SSatish Balay   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
9786849ba73SBarry Smith   PetscErrorCode ierr;
979b24ad042SBarry Smith   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
980b24ad042SBarry Smith   PetscInt       *row,*col,other_disassembled;
9817c922b88SBarry Smith   PetscTruth     r1,r2,r3;
9823eda8832SBarry Smith   MatScalar      *val;
983bbb85fb3SSatish Balay   InsertMode     addv = mat->insertmode;
984b24ad042SBarry Smith   PetscMPIInt    n;
985bbb85fb3SSatish Balay 
986bbb85fb3SSatish Balay   PetscFunctionBegin;
987bbb85fb3SSatish Balay   if (!baij->donotstash) {
988a2d1c673SSatish Balay     while (1) {
9898798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
990a2d1c673SSatish Balay       if (!flg) break;
991a2d1c673SSatish Balay 
992bbb85fb3SSatish Balay       for (i=0; i<n;) {
993bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
994bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
996bbb85fb3SSatish Balay         else       ncols = n-i;
997bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
99893fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999bbb85fb3SSatish Balay         i = j;
1000bbb85fb3SSatish Balay       }
1001bbb85fb3SSatish Balay     }
10028798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1004a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1005a2d1c673SSatish Balay        restore the original flags */
1006a2d1c673SSatish Balay     r1 = baij->roworiented;
1007a2d1c673SSatish Balay     r2 = a->roworiented;
1008a2d1c673SSatish Balay     r3 = b->roworiented;
10097c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
10107c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
10117c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
1012a2d1c673SSatish Balay     while (1) {
10138798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014a2d1c673SSatish Balay       if (!flg) break;
1015a2d1c673SSatish Balay 
1016a2d1c673SSatish Balay       for (i=0; i<n;) {
1017a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1018a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019a2d1c673SSatish Balay         if (j < n) ncols = j-i;
1020a2d1c673SSatish Balay         else       ncols = n-i;
102193fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022a2d1c673SSatish Balay         i = j;
1023a2d1c673SSatish Balay       }
1024a2d1c673SSatish Balay     }
10258798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026a2d1c673SSatish Balay     baij->roworiented = r1;
1027a2d1c673SSatish Balay     a->roworiented    = r2;
1028a2d1c673SSatish Balay     b->roworiented    = r3;
1029bbb85fb3SSatish Balay   }
1030bbb85fb3SSatish Balay 
1031bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033bbb85fb3SSatish Balay 
1034bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
1035bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1036bbb85fb3SSatish Balay   /*
1037bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1038bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1039bbb85fb3SSatish Balay   */
1040bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1043bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1044bbb85fb3SSatish Balay     }
1045bbb85fb3SSatish Balay   }
1046bbb85fb3SSatish Balay 
1047bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1049bbb85fb3SSatish Balay   }
105073e7a558SHong Zhang   b->compressedrow.use = PETSC_TRUE;
1051bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1052bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1053bbb85fb3SSatish Balay 
10546cf91177SBarry Smith #if defined(PETSC_USE_INFO)
1055bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1056*ae15b995SBarry Smith     ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1057bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1058bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1059bbb85fb3SSatish Balay   }
1060bbb85fb3SSatish Balay #endif
1061bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1062bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1063bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1064bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1065bbb85fb3SSatish Balay   }
1066bbb85fb3SSatish Balay 
1067606d414cSSatish Balay   if (baij->rowvalues) {
1068606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1069606d414cSSatish Balay     baij->rowvalues = 0;
1070606d414cSSatish Balay   }
1071bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1072bbb85fb3SSatish Balay }
107357b952d6SSatish Balay 
10744a2ae208SSatish Balay #undef __FUNCT__
10754a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
10766849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
107757b952d6SSatish Balay {
107857b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1079dfbe8321SBarry Smith   PetscErrorCode    ierr;
1080b24ad042SBarry Smith   PetscMPIInt       size = baij->size,rank = baij->rank;
1081521d7252SBarry Smith   PetscInt          bs = mat->bs;
108232077d6dSBarry Smith   PetscTruth        iascii,isdraw;
1083b0a32e0cSBarry Smith   PetscViewer       sviewer;
1084f3ef73ceSBarry Smith   PetscViewerFormat format;
108557b952d6SSatish Balay 
1086d64ed03dSBarry Smith   PetscFunctionBegin;
1087f81bd7ccSHong Zhang   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
108832077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1089fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
109032077d6dSBarry Smith   if (iascii) {
1091b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1092456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
10934e220ebcSLois Curfman McInnes       MatInfo info;
1094d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1095d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
109677431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
109777431f27SBarry Smith               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1098521d7252SBarry Smith               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1099d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
110077431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1101d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
110277431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1103b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
110457b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
11053a40ed3dSBarry Smith       PetscFunctionReturn(0);
1106fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
110777431f27SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
11083a40ed3dSBarry Smith       PetscFunctionReturn(0);
110904929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
111004929863SHong Zhang       PetscFunctionReturn(0);
111157b952d6SSatish Balay     }
111257b952d6SSatish Balay   }
111357b952d6SSatish Balay 
11140f5bd95cSBarry Smith   if (isdraw) {
1115b0a32e0cSBarry Smith     PetscDraw       draw;
111657b952d6SSatish Balay     PetscTruth isnull;
1117b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1118b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
111957b952d6SSatish Balay   }
112057b952d6SSatish Balay 
112157b952d6SSatish Balay   if (size == 1) {
1122873048abSBarry Smith     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
112357b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1124d64ed03dSBarry Smith   } else {
112557b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
112657b952d6SSatish Balay     Mat         A;
112757b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1128b24ad042SBarry Smith     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
11293eda8832SBarry Smith     MatScalar   *a;
113057b952d6SSatish Balay 
1131f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1132f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
1133f69a0ea3SMatthew Knepley     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
113457b952d6SSatish Balay     if (!rank) {
1135f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1136d64ed03dSBarry Smith     } else {
1137f69a0ea3SMatthew Knepley       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
113857b952d6SSatish Balay     }
1139f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1140521d7252SBarry Smith     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
114152e6d16bSBarry Smith     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
114257b952d6SSatish Balay 
114357b952d6SSatish Balay     /* copy over the A part */
114457b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
114557b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1146b24ad042SBarry Smith     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
114757b952d6SSatish Balay 
114857b952d6SSatish Balay     for (i=0; i<mbs; i++) {
114957b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
115057b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
115157b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
115257b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
115357b952d6SSatish Balay         for (k=0; k<bs; k++) {
115493fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1155cee3aa6bSSatish Balay           col++; a += bs;
115657b952d6SSatish Balay         }
115757b952d6SSatish Balay       }
115857b952d6SSatish Balay     }
115957b952d6SSatish Balay     /* copy over the B part */
116057b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
116157b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
116257b952d6SSatish Balay     for (i=0; i<mbs; i++) {
116357b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
116457b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
116557b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
116657b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
116757b952d6SSatish Balay         for (k=0; k<bs; k++) {
116893fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1169cee3aa6bSSatish Balay           col++; a += bs;
117057b952d6SSatish Balay         }
117157b952d6SSatish Balay       }
117257b952d6SSatish Balay     }
1173606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
11746d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
11756d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
117655843e3eSBarry Smith     /*
117755843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1178b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
117955843e3eSBarry Smith     */
1180b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1181f14a1c24SBarry Smith     if (!rank) {
1182e36acaf3SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
11836831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
118457b952d6SSatish Balay     }
1185b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
118657b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
118757b952d6SSatish Balay   }
11883a40ed3dSBarry Smith   PetscFunctionReturn(0);
118957b952d6SSatish Balay }
119057b952d6SSatish Balay 
11914a2ae208SSatish Balay #undef __FUNCT__
11924a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ"
1193dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
119457b952d6SSatish Balay {
1195dfbe8321SBarry Smith   PetscErrorCode ierr;
119632077d6dSBarry Smith   PetscTruth     iascii,isdraw,issocket,isbinary;
119757b952d6SSatish Balay 
1198d64ed03dSBarry Smith   PetscFunctionBegin;
119932077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1200fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1201b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1202fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
120332077d6dSBarry Smith   if (iascii || isdraw || issocket || isbinary) {
12047b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
12055cd90555SBarry Smith   } else {
120679a5c55eSBarry Smith     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
120757b952d6SSatish Balay   }
12083a40ed3dSBarry Smith   PetscFunctionReturn(0);
120957b952d6SSatish Balay }
121057b952d6SSatish Balay 
12114a2ae208SSatish Balay #undef __FUNCT__
12124a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ"
1213dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
121479bdfe76SSatish Balay {
121579bdfe76SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1216dfbe8321SBarry Smith   PetscErrorCode ierr;
121779bdfe76SSatish Balay 
1218d64ed03dSBarry Smith   PetscFunctionBegin;
1219aa482453SBarry Smith #if defined(PETSC_USE_LOG)
122077431f27SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
122179bdfe76SSatish Balay #endif
12228798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12238798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1224606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
122579bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
122679bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1227aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
12280f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
122948e59246SSatish Balay #else
1230606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
123148e59246SSatish Balay #endif
1232606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1233606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1234606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1235606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1236606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1237606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
12386fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
12396fa18ffdSBarry Smith   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
12406fa18ffdSBarry Smith #endif
1241606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
1242901853e0SKris Buschelman 
1243901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1244901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1245901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1246901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1247aac34f13SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1248901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1249901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
12503a40ed3dSBarry Smith   PetscFunctionReturn(0);
125179bdfe76SSatish Balay }
125279bdfe76SSatish Balay 
12534a2ae208SSatish Balay #undef __FUNCT__
12544a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ"
1255dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1256cee3aa6bSSatish Balay {
1257cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1258dfbe8321SBarry Smith   PetscErrorCode ierr;
1259b24ad042SBarry Smith   PetscInt       nt;
1260cee3aa6bSSatish Balay 
1261d64ed03dSBarry Smith   PetscFunctionBegin;
1262e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1263273d9f13SBarry Smith   if (nt != A->n) {
126429bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
126547b4a8eaSLois Curfman McInnes   }
1266e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1267273d9f13SBarry Smith   if (nt != A->m) {
126829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
126947b4a8eaSLois Curfman McInnes   }
127043a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1271f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
127243a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1273f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
127443a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
12753a40ed3dSBarry Smith   PetscFunctionReturn(0);
1276cee3aa6bSSatish Balay }
1277cee3aa6bSSatish Balay 
12784a2ae208SSatish Balay #undef __FUNCT__
12794a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1280dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1281cee3aa6bSSatish Balay {
1282cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1283dfbe8321SBarry Smith   PetscErrorCode ierr;
1284d64ed03dSBarry Smith 
1285d64ed03dSBarry Smith   PetscFunctionBegin;
128643a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1287f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
128843a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1289f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12903a40ed3dSBarry Smith   PetscFunctionReturn(0);
1291cee3aa6bSSatish Balay }
1292cee3aa6bSSatish Balay 
12934a2ae208SSatish Balay #undef __FUNCT__
12944a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1295dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1296cee3aa6bSSatish Balay {
1297cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1298dfbe8321SBarry Smith   PetscErrorCode ierr;
1299a5ff213dSBarry Smith   PetscTruth     merged;
1300cee3aa6bSSatish Balay 
1301d64ed03dSBarry Smith   PetscFunctionBegin;
1302a5ff213dSBarry Smith   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1303cee3aa6bSSatish Balay   /* do nondiagonal part */
13047c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1305a5ff213dSBarry Smith   if (!merged) {
1306cee3aa6bSSatish Balay     /* send it on its way */
1307537820f0SBarry Smith     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1308cee3aa6bSSatish Balay     /* do local part */
13097c922b88SBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1310cee3aa6bSSatish Balay     /* receive remote parts: note this assumes the values are not actually */
1311a5ff213dSBarry Smith     /* inserted in yy until the next line */
1312639f9d9dSBarry Smith     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1313a5ff213dSBarry Smith   } else {
1314a5ff213dSBarry Smith     /* do local part */
1315a5ff213dSBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1316a5ff213dSBarry Smith     /* send it on its way */
1317a5ff213dSBarry Smith     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1318a5ff213dSBarry Smith     /* values actually were received in the Begin() but we need to call this nop */
1319a5ff213dSBarry Smith     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1320a5ff213dSBarry Smith   }
13213a40ed3dSBarry Smith   PetscFunctionReturn(0);
1322cee3aa6bSSatish Balay }
1323cee3aa6bSSatish Balay 
13244a2ae208SSatish Balay #undef __FUNCT__
13254a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1326dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1327cee3aa6bSSatish Balay {
1328cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329dfbe8321SBarry Smith   PetscErrorCode ierr;
1330cee3aa6bSSatish Balay 
1331d64ed03dSBarry Smith   PetscFunctionBegin;
1332cee3aa6bSSatish Balay   /* do nondiagonal part */
13337c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1334cee3aa6bSSatish Balay   /* send it on its way */
1335537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1336cee3aa6bSSatish Balay   /* do local part */
13377c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1338cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1339cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1340cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1341537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
13423a40ed3dSBarry Smith   PetscFunctionReturn(0);
1343cee3aa6bSSatish Balay }
1344cee3aa6bSSatish Balay 
1345cee3aa6bSSatish Balay /*
1346cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1347cee3aa6bSSatish Balay    diagonal block
1348cee3aa6bSSatish Balay */
13494a2ae208SSatish Balay #undef __FUNCT__
13504a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1351dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1352cee3aa6bSSatish Balay {
1353cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1354dfbe8321SBarry Smith   PetscErrorCode ierr;
1355d64ed03dSBarry Smith 
1356d64ed03dSBarry Smith   PetscFunctionBegin;
1357273d9f13SBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
13583a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13593a40ed3dSBarry Smith   PetscFunctionReturn(0);
1360cee3aa6bSSatish Balay }
1361cee3aa6bSSatish Balay 
13624a2ae208SSatish Balay #undef __FUNCT__
13634a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ"
1364f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1365cee3aa6bSSatish Balay {
1366cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1367dfbe8321SBarry Smith   PetscErrorCode ierr;
1368d64ed03dSBarry Smith 
1369d64ed03dSBarry Smith   PetscFunctionBegin;
1370f4df32b1SMatthew Knepley   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1371f4df32b1SMatthew Knepley   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
13723a40ed3dSBarry Smith   PetscFunctionReturn(0);
1373cee3aa6bSSatish Balay }
1374026e39d0SSatish Balay 
13754a2ae208SSatish Balay #undef __FUNCT__
13764a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ"
1377b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1378acdf5bf4SSatish Balay {
1379acdf5bf4SSatish Balay   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
138087828ca2SBarry Smith   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
13816849ba73SBarry Smith   PetscErrorCode ierr;
1382521d7252SBarry Smith   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1383b24ad042SBarry Smith   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1384b24ad042SBarry Smith   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1385acdf5bf4SSatish Balay 
1386d64ed03dSBarry Smith   PetscFunctionBegin;
1387abc0a331SBarry Smith   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1388acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1389acdf5bf4SSatish Balay 
1390acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1391acdf5bf4SSatish Balay     /*
1392acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1393acdf5bf4SSatish Balay     */
1394acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1395b24ad042SBarry Smith     PetscInt     max = 1,mbs = mat->mbs,tmp;
1396bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1397acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1398acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1399acdf5bf4SSatish Balay     }
1400b24ad042SBarry Smith     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1401b24ad042SBarry Smith     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1402acdf5bf4SSatish Balay   }
1403acdf5bf4SSatish Balay 
140429bbc08cSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1405d9d09a02SSatish Balay   lrow = row - brstart;
1406acdf5bf4SSatish Balay 
1407acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1408acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1409acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1410f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1411f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1412acdf5bf4SSatish Balay   nztot = nzA + nzB;
1413acdf5bf4SSatish Balay 
1414acdf5bf4SSatish Balay   cmap  = mat->garray;
1415acdf5bf4SSatish Balay   if (v  || idx) {
1416acdf5bf4SSatish Balay     if (nztot) {
1417acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1418b24ad042SBarry Smith       PetscInt imark = -1;
1419acdf5bf4SSatish Balay       if (v) {
1420acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1421acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1422d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1423acdf5bf4SSatish Balay           else break;
1424acdf5bf4SSatish Balay         }
1425acdf5bf4SSatish Balay         imark = i;
1426acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1427acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1428acdf5bf4SSatish Balay       }
1429acdf5bf4SSatish Balay       if (idx) {
1430acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1431acdf5bf4SSatish Balay         if (imark > -1) {
1432acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1433bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1434acdf5bf4SSatish Balay           }
1435acdf5bf4SSatish Balay         } else {
1436acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1437d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1438d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1439acdf5bf4SSatish Balay             else break;
1440acdf5bf4SSatish Balay           }
1441acdf5bf4SSatish Balay           imark = i;
1442acdf5bf4SSatish Balay         }
1443d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1444d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1445acdf5bf4SSatish Balay       }
1446d64ed03dSBarry Smith     } else {
1447d212a18eSSatish Balay       if (idx) *idx = 0;
1448d212a18eSSatish Balay       if (v)   *v   = 0;
1449d212a18eSSatish Balay     }
1450acdf5bf4SSatish Balay   }
1451acdf5bf4SSatish Balay   *nz = nztot;
1452f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1453f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14543a40ed3dSBarry Smith   PetscFunctionReturn(0);
1455acdf5bf4SSatish Balay }
1456acdf5bf4SSatish Balay 
14574a2ae208SSatish Balay #undef __FUNCT__
14584a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1459b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1460acdf5bf4SSatish Balay {
1461acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1462d64ed03dSBarry Smith 
1463d64ed03dSBarry Smith   PetscFunctionBegin;
1464abc0a331SBarry Smith   if (!baij->getrowactive) {
146529bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1466acdf5bf4SSatish Balay   }
1467acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14683a40ed3dSBarry Smith   PetscFunctionReturn(0);
1469acdf5bf4SSatish Balay }
1470acdf5bf4SSatish Balay 
14714a2ae208SSatish Balay #undef __FUNCT__
14724a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1473dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
147458667388SSatish Balay {
147558667388SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1476dfbe8321SBarry Smith   PetscErrorCode ierr;
1477d64ed03dSBarry Smith 
1478d64ed03dSBarry Smith   PetscFunctionBegin;
147958667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
148058667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14813a40ed3dSBarry Smith   PetscFunctionReturn(0);
148258667388SSatish Balay }
14830ac07820SSatish Balay 
14844a2ae208SSatish Balay #undef __FUNCT__
14854a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1486dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14870ac07820SSatish Balay {
14884e220ebcSLois Curfman McInnes   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
14894e220ebcSLois Curfman McInnes   Mat            A = a->A,B = a->B;
1490dfbe8321SBarry Smith   PetscErrorCode ierr;
1491329f5518SBarry Smith   PetscReal      isend[5],irecv[5];
14920ac07820SSatish Balay 
1493d64ed03dSBarry Smith   PetscFunctionBegin;
1494521d7252SBarry Smith   info->block_size     = (PetscReal)matin->bs;
14954e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
14960e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1497de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14984e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
14990e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1500de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
15010ac07820SSatish Balay   if (flag == MAT_LOCAL) {
15024e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
15034e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
15044e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
15054e220ebcSLois Curfman McInnes     info->memory       = isend[3];
15064e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
15070ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1508d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
15094e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15104e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15114e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15124e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15134e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
15140ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1515d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
15164e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15174e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15184e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15194e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15204e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1521d41123aaSBarry Smith   } else {
152277431f27SBarry Smith     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
15230ac07820SSatish Balay   }
1524f6275e2eSBarry Smith   info->rows_global       = (PetscReal)A->M;
1525f6275e2eSBarry Smith   info->columns_global    = (PetscReal)A->N;
1526f6275e2eSBarry Smith   info->rows_local        = (PetscReal)A->m;
1527f6275e2eSBarry Smith   info->columns_local     = (PetscReal)A->N;
15284e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
15294e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
15304e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
15313a40ed3dSBarry Smith   PetscFunctionReturn(0);
15320ac07820SSatish Balay }
15330ac07820SSatish Balay 
15344a2ae208SSatish Balay #undef __FUNCT__
15354a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ"
1536dfbe8321SBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
153758667388SSatish Balay {
153858667388SSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1539dfbe8321SBarry Smith   PetscErrorCode ierr;
154058667388SSatish Balay 
1541d64ed03dSBarry Smith   PetscFunctionBegin;
154212c028f9SKris Buschelman   switch (op) {
154312c028f9SKris Buschelman   case MAT_NO_NEW_NONZERO_LOCATIONS:
154412c028f9SKris Buschelman   case MAT_YES_NEW_NONZERO_LOCATIONS:
154512c028f9SKris Buschelman   case MAT_COLUMNS_UNSORTED:
154612c028f9SKris Buschelman   case MAT_COLUMNS_SORTED:
154712c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
154812c028f9SKris Buschelman   case MAT_KEEP_ZEROED_ROWS:
154912c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
155098305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
155198305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
155212c028f9SKris Buschelman     break;
155312c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
15547c922b88SBarry Smith     a->roworiented = PETSC_TRUE;
155598305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
155698305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
155712c028f9SKris Buschelman     break;
155812c028f9SKris Buschelman   case MAT_ROWS_SORTED:
155912c028f9SKris Buschelman   case MAT_ROWS_UNSORTED:
156012c028f9SKris Buschelman   case MAT_YES_NEW_DIAGONALS:
1561*ae15b995SBarry Smith     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
156212c028f9SKris Buschelman     break;
156312c028f9SKris Buschelman   case MAT_COLUMN_ORIENTED:
15647c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
156598305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
156698305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
156712c028f9SKris Buschelman     break;
156812c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
15697c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
157012c028f9SKris Buschelman     break;
157112c028f9SKris Buschelman   case MAT_NO_NEW_DIAGONALS:
157229bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
157312c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
15747c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
157512c028f9SKris Buschelman     break;
157677e54ba9SKris Buschelman   case MAT_SYMMETRIC:
157777e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
15782188ac68SBarry Smith   case MAT_HERMITIAN:
15792188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
15802188ac68SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
15812188ac68SBarry Smith     break;
15829a4540c5SBarry Smith   case MAT_NOT_SYMMETRIC:
15839a4540c5SBarry Smith   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
15849a4540c5SBarry Smith   case MAT_NOT_HERMITIAN:
15859a4540c5SBarry Smith   case MAT_NOT_SYMMETRY_ETERNAL:
158677e54ba9SKris Buschelman     break;
158712c028f9SKris Buschelman   default:
158829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"unknown option");
1589d64ed03dSBarry Smith   }
15903a40ed3dSBarry Smith   PetscFunctionReturn(0);
159158667388SSatish Balay }
159258667388SSatish Balay 
15934a2ae208SSatish Balay #undef __FUNCT__
15944a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ("
1595dfbe8321SBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15960ac07820SSatish Balay {
15970ac07820SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
15980ac07820SSatish Balay   Mat_SeqBAIJ    *Aloc;
15990ac07820SSatish Balay   Mat            B;
1600dfbe8321SBarry Smith   PetscErrorCode ierr;
1601b24ad042SBarry Smith   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1602521d7252SBarry Smith   PetscInt       bs=A->bs,mbs=baij->mbs;
16033eda8832SBarry Smith   MatScalar      *a;
16040ac07820SSatish Balay 
1605d64ed03dSBarry Smith   PetscFunctionBegin;
160629bbc08cSBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1607f69a0ea3SMatthew Knepley   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1608f69a0ea3SMatthew Knepley   ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr);
1609f204ca49SKris Buschelman   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1610521d7252SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
16110ac07820SSatish Balay 
16120ac07820SSatish Balay   /* copy over the A part */
16130ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
16140ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1615b24ad042SBarry Smith   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
16160ac07820SSatish Balay 
16170ac07820SSatish Balay   for (i=0; i<mbs; i++) {
16180ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16190ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16200ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16210ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
16220ac07820SSatish Balay       for (k=0; k<bs; k++) {
162393fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16240ac07820SSatish Balay         col++; a += bs;
16250ac07820SSatish Balay       }
16260ac07820SSatish Balay     }
16270ac07820SSatish Balay   }
16280ac07820SSatish Balay   /* copy over the B part */
16290ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
16300ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
16310ac07820SSatish Balay   for (i=0; i<mbs; i++) {
16320ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16330ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16340ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16350ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16360ac07820SSatish Balay       for (k=0; k<bs; k++) {
163793fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16380ac07820SSatish Balay         col++; a += bs;
16390ac07820SSatish Balay       }
16400ac07820SSatish Balay     }
16410ac07820SSatish Balay   }
1642606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
16430ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16440ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16450ac07820SSatish Balay 
16467c922b88SBarry Smith   if (matout) {
16470ac07820SSatish Balay     *matout = B;
16480ac07820SSatish Balay   } else {
1649273d9f13SBarry Smith     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
16500ac07820SSatish Balay   }
16513a40ed3dSBarry Smith   PetscFunctionReturn(0);
16520ac07820SSatish Balay }
16530e95ebc0SSatish Balay 
16544a2ae208SSatish Balay #undef __FUNCT__
16554a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1656dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16570e95ebc0SSatish Balay {
165836c4a09eSSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
165936c4a09eSSatish Balay   Mat            a = baij->A,b = baij->B;
1660dfbe8321SBarry Smith   PetscErrorCode ierr;
1661b24ad042SBarry Smith   PetscInt       s1,s2,s3;
16620e95ebc0SSatish Balay 
1663d64ed03dSBarry Smith   PetscFunctionBegin;
166436c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
166536c4a09eSSatish Balay   if (rr) {
166636c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
166729bbc08cSBarry Smith     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
166836c4a09eSSatish Balay     /* Overlap communication with computation. */
166936c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
167036c4a09eSSatish Balay   }
16710e95ebc0SSatish Balay   if (ll) {
16720e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
167329bbc08cSBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1674a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
16750e95ebc0SSatish Balay   }
167636c4a09eSSatish Balay   /* scale  the diagonal block */
167736c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
167836c4a09eSSatish Balay 
167936c4a09eSSatish Balay   if (rr) {
168036c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
168136c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1682a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
168336c4a09eSSatish Balay   }
168436c4a09eSSatish Balay 
16853a40ed3dSBarry Smith   PetscFunctionReturn(0);
16860e95ebc0SSatish Balay }
16870e95ebc0SSatish Balay 
16884a2ae208SSatish Balay #undef __FUNCT__
16894a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1690f4df32b1SMatthew Knepley PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
16910ac07820SSatish Balay {
16920ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
16936849ba73SBarry Smith   PetscErrorCode ierr;
1694b24ad042SBarry Smith   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1695f4df32b1SMatthew Knepley   PetscInt       i,*owners = l->rowners;
1696b24ad042SBarry Smith   PetscInt       *nprocs,j,idx,nsends,row;
1697b24ad042SBarry Smith   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
16986543fbbaSBarry Smith   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1699521d7252SBarry Smith   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
17000ac07820SSatish Balay   MPI_Comm       comm = A->comm;
17010ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
17020ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
17036543fbbaSBarry Smith #if defined(PETSC_DEBUG)
17046543fbbaSBarry Smith   PetscTruth     found = PETSC_FALSE;
17056543fbbaSBarry Smith #endif
17060ac07820SSatish Balay 
1707d64ed03dSBarry Smith   PetscFunctionBegin;
17080ac07820SSatish Balay   /*  first count number of contributors to each processor */
1709b24ad042SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1710b24ad042SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1711b24ad042SBarry Smith   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
17126543fbbaSBarry Smith   j = 0;
17130ac07820SSatish Balay   for (i=0; i<N; i++) {
17146543fbbaSBarry Smith     if (lastidx > (idx = rows[i])) j = 0;
17156543fbbaSBarry Smith     lastidx = idx;
17166543fbbaSBarry Smith     for (; j<size; j++) {
17170ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
17186543fbbaSBarry Smith         nprocs[2*j]++;
17196543fbbaSBarry Smith         nprocs[2*j+1] = 1;
17206543fbbaSBarry Smith         owner[i] = j;
17216543fbbaSBarry Smith #if defined(PETSC_DEBUG)
17226543fbbaSBarry Smith         found = PETSC_TRUE;
17236543fbbaSBarry Smith #endif
17246543fbbaSBarry Smith         break;
17250ac07820SSatish Balay       }
17260ac07820SSatish Balay     }
17276543fbbaSBarry Smith #if defined(PETSC_DEBUG)
172829bbc08cSBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
17296543fbbaSBarry Smith     found = PETSC_FALSE;
17306543fbbaSBarry Smith #endif
17310ac07820SSatish Balay   }
1732c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
17330ac07820SSatish Balay 
17340ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
1735c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
17360ac07820SSatish Balay 
17370ac07820SSatish Balay   /* post receives:   */
1738b24ad042SBarry Smith   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1739b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
17400ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1741b24ad042SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
17420ac07820SSatish Balay   }
17430ac07820SSatish Balay 
17440ac07820SSatish Balay   /* do sends:
17450ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
17460ac07820SSatish Balay      the ith processor
17470ac07820SSatish Balay   */
1748b24ad042SBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1749b0a32e0cSBarry Smith   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1750b24ad042SBarry Smith   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
17510ac07820SSatish Balay   starts[0]  = 0;
1752c1dc657dSBarry Smith   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17530ac07820SSatish Balay   for (i=0; i<N; i++) {
17540ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
17550ac07820SSatish Balay   }
17560ac07820SSatish Balay 
17570ac07820SSatish Balay   starts[0] = 0;
1758c1dc657dSBarry Smith   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
17590ac07820SSatish Balay   count = 0;
17600ac07820SSatish Balay   for (i=0; i<size; i++) {
1761c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
1762b24ad042SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17630ac07820SSatish Balay     }
17640ac07820SSatish Balay   }
1765606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
17660ac07820SSatish Balay 
17670ac07820SSatish Balay   base = owners[rank]*bs;
17680ac07820SSatish Balay 
17690ac07820SSatish Balay   /*  wait on receives */
1770b24ad042SBarry Smith   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
17710ac07820SSatish Balay   source = lens + nrecvs;
17720ac07820SSatish Balay   count  = nrecvs; slen = 0;
17730ac07820SSatish Balay   while (count) {
1774ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17750ac07820SSatish Balay     /* unpack receives into our local space */
1776b24ad042SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
17770ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17780ac07820SSatish Balay     lens[imdex]    = n;
17790ac07820SSatish Balay     slen          += n;
17800ac07820SSatish Balay     count--;
17810ac07820SSatish Balay   }
1782606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
17830ac07820SSatish Balay 
17840ac07820SSatish Balay   /* move the data into the send scatter */
1785b24ad042SBarry Smith   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
17860ac07820SSatish Balay   count = 0;
17870ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
17880ac07820SSatish Balay     values = rvalues + i*nmax;
17890ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
17900ac07820SSatish Balay       lrows[count++] = values[j] - base;
17910ac07820SSatish Balay     }
17920ac07820SSatish Balay   }
1793606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1794606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1795606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1796606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
17970ac07820SSatish Balay 
17980ac07820SSatish Balay   /* actually zap the local rows */
179972dacd9aSBarry Smith   /*
180072dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
1801a8c7a070SBarry Smith      is square and the user wishes to set the diagonal we use separate
180272dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
180372dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
180472dacd9aSBarry Smith 
1805f4df32b1SMatthew Knepley        Contributed by: Matthew Knepley
180672dacd9aSBarry Smith   */
18079c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1808f4df32b1SMatthew Knepley   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1809f4df32b1SMatthew Knepley   if ((diag != 0.0) && (l->A->M == l->A->N)) {
1810f4df32b1SMatthew Knepley     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1811f4df32b1SMatthew Knepley   } else if (diag != 0.0) {
1812f4df32b1SMatthew Knepley     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1813fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
181429bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1815fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
18166525c446SSatish Balay     }
1817a07cd24cSSatish Balay     for (i=0; i<slen; i++) {
1818a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
1819f4df32b1SMatthew Knepley       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1820a07cd24cSSatish Balay     }
1821a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1822a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
18239c957beeSSatish Balay   } else {
1824f4df32b1SMatthew Knepley     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1825a07cd24cSSatish Balay   }
18269c957beeSSatish Balay 
1827606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1828a07cd24cSSatish Balay 
18290ac07820SSatish Balay   /* wait on sends */
18300ac07820SSatish Balay   if (nsends) {
183182502324SSatish Balay     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1832ca161407SBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1833606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
18340ac07820SSatish Balay   }
1835606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1836606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
18370ac07820SSatish Balay 
18383a40ed3dSBarry Smith   PetscFunctionReturn(0);
18390ac07820SSatish Balay }
184072dacd9aSBarry Smith 
18414a2ae208SSatish Balay #undef __FUNCT__
18424a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1843dfbe8321SBarry Smith PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1844ba4ca20aSSatish Balay {
1845ba4ca20aSSatish Balay   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
184625fdafccSSatish Balay   MPI_Comm          comm = A->comm;
1847b24ad042SBarry Smith   static PetscTruth called = PETSC_FALSE;
1848dfbe8321SBarry Smith   PetscErrorCode    ierr;
1849ba4ca20aSSatish Balay 
1850d64ed03dSBarry Smith   PetscFunctionBegin;
18513a40ed3dSBarry Smith   if (!a->rank) {
18523a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
185325fdafccSSatish Balay   }
1854b24ad042SBarry Smith   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1855d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1856d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
18573a40ed3dSBarry Smith   PetscFunctionReturn(0);
1858ba4ca20aSSatish Balay }
18590ac07820SSatish Balay 
18604a2ae208SSatish Balay #undef __FUNCT__
18614a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1862dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1863bb5a7306SBarry Smith {
1864bb5a7306SBarry Smith   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1865dfbe8321SBarry Smith   PetscErrorCode ierr;
1866d64ed03dSBarry Smith 
1867d64ed03dSBarry Smith   PetscFunctionBegin;
1868bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
18693a40ed3dSBarry Smith   PetscFunctionReturn(0);
1870bb5a7306SBarry Smith }
1871bb5a7306SBarry Smith 
18726849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18730ac07820SSatish Balay 
18744a2ae208SSatish Balay #undef __FUNCT__
18754a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ"
1876dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
18777fc3c18eSBarry Smith {
18787fc3c18eSBarry Smith   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
18797fc3c18eSBarry Smith   Mat            a,b,c,d;
18807fc3c18eSBarry Smith   PetscTruth     flg;
1881dfbe8321SBarry Smith   PetscErrorCode ierr;
18827fc3c18eSBarry Smith 
18837fc3c18eSBarry Smith   PetscFunctionBegin;
18847fc3c18eSBarry Smith   a = matA->A; b = matA->B;
18857fc3c18eSBarry Smith   c = matB->A; d = matB->B;
18867fc3c18eSBarry Smith 
18877fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1888abc0a331SBarry Smith   if (flg) {
18897fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18907fc3c18eSBarry Smith   }
18917fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
18927fc3c18eSBarry Smith   PetscFunctionReturn(0);
18937fc3c18eSBarry Smith }
18947fc3c18eSBarry Smith 
18953c896bc6SHong Zhang #undef __FUNCT__
18963c896bc6SHong Zhang #define __FUNCT__ "MatCopy_MPIBAIJ"
18973c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
18983c896bc6SHong Zhang {
18993c896bc6SHong Zhang   PetscErrorCode ierr;
19003c896bc6SHong Zhang   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
19013c896bc6SHong Zhang   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
19023c896bc6SHong Zhang 
19033c896bc6SHong Zhang   PetscFunctionBegin;
19043c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
19053c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
19063c896bc6SHong Zhang     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
19073c896bc6SHong Zhang   } else {
19083c896bc6SHong Zhang     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
19093c896bc6SHong Zhang     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
19103c896bc6SHong Zhang   }
19113c896bc6SHong Zhang   PetscFunctionReturn(0);
19123c896bc6SHong Zhang }
1913273d9f13SBarry Smith 
19144a2ae208SSatish Balay #undef __FUNCT__
19154a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1916dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1917273d9f13SBarry Smith {
1918dfbe8321SBarry Smith   PetscErrorCode ierr;
1919273d9f13SBarry Smith 
1920273d9f13SBarry Smith   PetscFunctionBegin;
1921273d9f13SBarry Smith   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1922273d9f13SBarry Smith   PetscFunctionReturn(0);
1923273d9f13SBarry Smith }
1924273d9f13SBarry Smith 
19254fe895cdSHong Zhang #include "petscblaslapack.h"
19264fe895cdSHong Zhang #undef __FUNCT__
19274fe895cdSHong Zhang #define __FUNCT__ "MatAXPY_MPIBAIJ"
19284fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
19294fe895cdSHong Zhang {
19304fe895cdSHong Zhang   PetscErrorCode ierr;
19314fe895cdSHong Zhang   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
19324fe895cdSHong Zhang   PetscBLASInt   bnz,one=1;
19334fe895cdSHong Zhang   Mat_SeqBAIJ    *x,*y;
19344fe895cdSHong Zhang 
19354fe895cdSHong Zhang   PetscFunctionBegin;
19364fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
19374fe895cdSHong Zhang     PetscScalar alpha = a;
19384fe895cdSHong Zhang     x = (Mat_SeqBAIJ *)xx->A->data;
19394fe895cdSHong Zhang     y = (Mat_SeqBAIJ *)yy->A->data;
19404fe895cdSHong Zhang     bnz = (PetscBLASInt)x->nz;
19414fe895cdSHong Zhang     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
19424fe895cdSHong Zhang     x = (Mat_SeqBAIJ *)xx->B->data;
19434fe895cdSHong Zhang     y = (Mat_SeqBAIJ *)yy->B->data;
19444fe895cdSHong Zhang     bnz = (PetscBLASInt)x->nz;
19454fe895cdSHong Zhang     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
19464fe895cdSHong Zhang   } else {
19474fe895cdSHong Zhang     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
19484fe895cdSHong Zhang   }
19494fe895cdSHong Zhang   PetscFunctionReturn(0);
19504fe895cdSHong Zhang }
19514fe895cdSHong Zhang 
195299cafbc1SBarry Smith #undef __FUNCT__
195399cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_MPIBAIJ"
195499cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
195599cafbc1SBarry Smith {
195699cafbc1SBarry Smith   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
195799cafbc1SBarry Smith   PetscErrorCode ierr;
195899cafbc1SBarry Smith 
195999cafbc1SBarry Smith   PetscFunctionBegin;
196099cafbc1SBarry Smith   ierr = MatRealPart(a->A);CHKERRQ(ierr);
196199cafbc1SBarry Smith   ierr = MatRealPart(a->B);CHKERRQ(ierr);
196299cafbc1SBarry Smith   PetscFunctionReturn(0);
196399cafbc1SBarry Smith }
196499cafbc1SBarry Smith 
196599cafbc1SBarry Smith #undef __FUNCT__
196699cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
196799cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
196899cafbc1SBarry Smith {
196999cafbc1SBarry Smith   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
197099cafbc1SBarry Smith   PetscErrorCode ierr;
197199cafbc1SBarry Smith 
197299cafbc1SBarry Smith   PetscFunctionBegin;
197399cafbc1SBarry Smith   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
197499cafbc1SBarry Smith   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
197599cafbc1SBarry Smith   PetscFunctionReturn(0);
197699cafbc1SBarry Smith }
197799cafbc1SBarry Smith 
197879bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1979cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1980cc2dc46cSBarry Smith        MatSetValues_MPIBAIJ,
1981cc2dc46cSBarry Smith        MatGetRow_MPIBAIJ,
1982cc2dc46cSBarry Smith        MatRestoreRow_MPIBAIJ,
1983cc2dc46cSBarry Smith        MatMult_MPIBAIJ,
198497304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ,
19857c922b88SBarry Smith        MatMultTranspose_MPIBAIJ,
19867c922b88SBarry Smith        MatMultTransposeAdd_MPIBAIJ,
1987cc2dc46cSBarry Smith        0,
1988cc2dc46cSBarry Smith        0,
1989cc2dc46cSBarry Smith        0,
199097304618SKris Buschelman /*10*/ 0,
1991cc2dc46cSBarry Smith        0,
1992cc2dc46cSBarry Smith        0,
1993cc2dc46cSBarry Smith        0,
1994cc2dc46cSBarry Smith        MatTranspose_MPIBAIJ,
199597304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ,
19967fc3c18eSBarry Smith        MatEqual_MPIBAIJ,
1997cc2dc46cSBarry Smith        MatGetDiagonal_MPIBAIJ,
1998cc2dc46cSBarry Smith        MatDiagonalScale_MPIBAIJ,
1999cc2dc46cSBarry Smith        MatNorm_MPIBAIJ,
200097304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ,
2001cc2dc46cSBarry Smith        MatAssemblyEnd_MPIBAIJ,
2002cc2dc46cSBarry Smith        0,
2003cc2dc46cSBarry Smith        MatSetOption_MPIBAIJ,
2004cc2dc46cSBarry Smith        MatZeroEntries_MPIBAIJ,
200597304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ,
2006cc2dc46cSBarry Smith        0,
2007cc2dc46cSBarry Smith        0,
2008cc2dc46cSBarry Smith        0,
2009cc2dc46cSBarry Smith        0,
201097304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ,
2011273d9f13SBarry Smith        0,
2012cc2dc46cSBarry Smith        0,
2013cc2dc46cSBarry Smith        0,
2014cc2dc46cSBarry Smith        0,
201597304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ,
2016cc2dc46cSBarry Smith        0,
2017cc2dc46cSBarry Smith        0,
2018cc2dc46cSBarry Smith        0,
2019cc2dc46cSBarry Smith        0,
20204fe895cdSHong Zhang /*40*/ MatAXPY_MPIBAIJ,
2021cc2dc46cSBarry Smith        MatGetSubMatrices_MPIBAIJ,
2022cc2dc46cSBarry Smith        MatIncreaseOverlap_MPIBAIJ,
2023cc2dc46cSBarry Smith        MatGetValues_MPIBAIJ,
20243c896bc6SHong Zhang        MatCopy_MPIBAIJ,
202597304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ,
2026cc2dc46cSBarry Smith        MatScale_MPIBAIJ,
2027cc2dc46cSBarry Smith        0,
2028cc2dc46cSBarry Smith        0,
2029cc2dc46cSBarry Smith        0,
2030521d7252SBarry Smith /*50*/ 0,
2031cc2dc46cSBarry Smith        0,
2032cc2dc46cSBarry Smith        0,
2033cc2dc46cSBarry Smith        0,
2034cc2dc46cSBarry Smith        0,
203597304618SKris Buschelman /*55*/ 0,
2036cc2dc46cSBarry Smith        0,
2037cc2dc46cSBarry Smith        MatSetUnfactored_MPIBAIJ,
2038cc2dc46cSBarry Smith        0,
2039cc2dc46cSBarry Smith        MatSetValuesBlocked_MPIBAIJ,
204097304618SKris Buschelman /*60*/ 0,
2041f14a1c24SBarry Smith        MatDestroy_MPIBAIJ,
2042f14a1c24SBarry Smith        MatView_MPIBAIJ,
20438a124369SBarry Smith        MatGetPetscMaps_Petsc,
20447843d17aSBarry Smith        0,
204597304618SKris Buschelman /*65*/ 0,
20467843d17aSBarry Smith        0,
20477843d17aSBarry Smith        0,
20487843d17aSBarry Smith        0,
20497843d17aSBarry Smith        0,
205097304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ,
20517843d17aSBarry Smith        0,
205297304618SKris Buschelman        0,
205397304618SKris Buschelman        0,
205497304618SKris Buschelman        0,
205597304618SKris Buschelman /*75*/ 0,
205697304618SKris Buschelman        0,
205797304618SKris Buschelman        0,
205897304618SKris Buschelman        0,
205997304618SKris Buschelman        0,
206097304618SKris Buschelman /*80*/ 0,
206197304618SKris Buschelman        0,
206297304618SKris Buschelman        0,
206397304618SKris Buschelman        0,
2064865e5f61SKris Buschelman        MatLoad_MPIBAIJ,
2065865e5f61SKris Buschelman /*85*/ 0,
2066865e5f61SKris Buschelman        0,
2067865e5f61SKris Buschelman        0,
2068865e5f61SKris Buschelman        0,
2069865e5f61SKris Buschelman        0,
2070865e5f61SKris Buschelman /*90*/ 0,
2071865e5f61SKris Buschelman        0,
2072865e5f61SKris Buschelman        0,
2073865e5f61SKris Buschelman        0,
2074865e5f61SKris Buschelman        0,
2075865e5f61SKris Buschelman /*95*/ 0,
2076865e5f61SKris Buschelman        0,
2077865e5f61SKris Buschelman        0,
207899cafbc1SBarry Smith        0,
207999cafbc1SBarry Smith        0,
208099cafbc1SBarry Smith /*100*/0,
208199cafbc1SBarry Smith        0,
208299cafbc1SBarry Smith        0,
208399cafbc1SBarry Smith        0,
208499cafbc1SBarry Smith        0,
208599cafbc1SBarry Smith /*105*/0,
208699cafbc1SBarry Smith        MatRealPart_MPIBAIJ,
208799cafbc1SBarry Smith        MatImaginaryPart_MPIBAIJ};
208879bdfe76SSatish Balay 
20895ef9f2a5SBarry Smith 
2090e18c124aSSatish Balay EXTERN_C_BEGIN
20914a2ae208SSatish Balay #undef __FUNCT__
20924a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2093be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
20945ef9f2a5SBarry Smith {
20955ef9f2a5SBarry Smith   PetscFunctionBegin;
20965ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
20975ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
20985ef9f2a5SBarry Smith   PetscFunctionReturn(0);
20995ef9f2a5SBarry Smith }
2100e18c124aSSatish Balay EXTERN_C_END
210179bdfe76SSatish Balay 
2102273d9f13SBarry Smith EXTERN_C_BEGIN
2103f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2104d94109b8SHong Zhang EXTERN_C_END
2105d94109b8SHong Zhang 
2106aac34f13SBarry Smith #undef __FUNCT__
2107aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2108aac34f13SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2109aac34f13SBarry Smith {
2110aac34f13SBarry Smith   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2111aac34f13SBarry Smith   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2112aac34f13SBarry Smith   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2113aac34f13SBarry Smith   const PetscInt *JJ;
2114aac34f13SBarry Smith   PetscScalar    *values;
2115aac34f13SBarry Smith   PetscErrorCode ierr;
2116aac34f13SBarry Smith 
2117aac34f13SBarry Smith   PetscFunctionBegin;
2118aac34f13SBarry Smith #if defined(PETSC_OPT_g)
2119aac34f13SBarry Smith   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2120aac34f13SBarry Smith #endif
2121aac34f13SBarry Smith   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2122aac34f13SBarry Smith   o_nnz = d_nnz + m;
2123aac34f13SBarry Smith 
2124aac34f13SBarry Smith   for (i=0; i<m; i++) {
2125aac34f13SBarry Smith     nnz     = I[i+1]- I[i];
2126aac34f13SBarry Smith     JJ      = J + I[i];
2127aac34f13SBarry Smith     nnz_max = PetscMax(nnz_max,nnz);
2128aac34f13SBarry Smith #if defined(PETSC_OPT_g)
2129aac34f13SBarry Smith     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2130aac34f13SBarry Smith #endif
2131aac34f13SBarry Smith     for (j=0; j<nnz; j++) {
2132aac34f13SBarry Smith       if (*JJ >= cstart) break;
2133aac34f13SBarry Smith       JJ++;
2134aac34f13SBarry Smith     }
2135aac34f13SBarry Smith     d = 0;
2136aac34f13SBarry Smith     for (; j<nnz; j++) {
2137aac34f13SBarry Smith       if (*JJ++ >= cend) break;
2138aac34f13SBarry Smith       d++;
2139aac34f13SBarry Smith     }
2140aac34f13SBarry Smith     d_nnz[i] = d;
2141aac34f13SBarry Smith     o_nnz[i] = nnz - d;
2142aac34f13SBarry Smith   }
2143aac34f13SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2144aac34f13SBarry Smith   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2145aac34f13SBarry Smith 
2146aac34f13SBarry Smith   if (v) values = (PetscScalar*)v;
2147aac34f13SBarry Smith   else {
2148aac34f13SBarry Smith     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2149aac34f13SBarry Smith     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2150aac34f13SBarry Smith   }
2151aac34f13SBarry Smith 
2152aac34f13SBarry Smith   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2153aac34f13SBarry Smith   for (i=0; i<m; i++) {
2154aac34f13SBarry Smith     ii   = i + rstart;
2155aac34f13SBarry Smith     nnz  = I[i+1]- I[i];
21563f01e284SBarry Smith     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2157aac34f13SBarry Smith   }
2158aac34f13SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159aac34f13SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160aac34f13SBarry Smith   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2161aac34f13SBarry Smith 
2162aac34f13SBarry Smith   if (!v) {
2163aac34f13SBarry Smith     ierr = PetscFree(values);CHKERRQ(ierr);
2164aac34f13SBarry Smith   }
2165aac34f13SBarry Smith   PetscFunctionReturn(0);
2166aac34f13SBarry Smith }
2167aac34f13SBarry Smith 
2168aac34f13SBarry Smith #undef __FUNCT__
2169aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2170aac34f13SBarry Smith /*@C
2171aac34f13SBarry Smith    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2172aac34f13SBarry Smith    (the default parallel PETSc format).
2173aac34f13SBarry Smith 
2174aac34f13SBarry Smith    Collective on MPI_Comm
2175aac34f13SBarry Smith 
2176aac34f13SBarry Smith    Input Parameters:
2177aac34f13SBarry Smith +  A - the matrix
2178aac34f13SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2179aac34f13SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2180aac34f13SBarry Smith -  v - optional values in the matrix
2181aac34f13SBarry Smith 
2182aac34f13SBarry Smith    Level: developer
2183aac34f13SBarry Smith 
2184aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel
2185aac34f13SBarry Smith 
2186aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2187aac34f13SBarry Smith @*/
2188be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2189aac34f13SBarry Smith {
2190aac34f13SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2191aac34f13SBarry Smith 
2192aac34f13SBarry Smith   PetscFunctionBegin;
2193aac34f13SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2194aac34f13SBarry Smith   if (f) {
2195aac34f13SBarry Smith     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2196aac34f13SBarry Smith   }
2197aac34f13SBarry Smith   PetscFunctionReturn(0);
2198aac34f13SBarry Smith }
2199aac34f13SBarry Smith 
2200d94109b8SHong Zhang EXTERN_C_BEGIN
22014a2ae208SSatish Balay #undef __FUNCT__
2202a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2203be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2204a23d5eceSKris Buschelman {
2205a23d5eceSKris Buschelman   Mat_MPIBAIJ    *b;
2206dfbe8321SBarry Smith   PetscErrorCode ierr;
2207b24ad042SBarry Smith   PetscInt       i;
2208a23d5eceSKris Buschelman 
2209a23d5eceSKris Buschelman   PetscFunctionBegin;
2210a23d5eceSKris Buschelman   B->preallocated = PETSC_TRUE;
2211a23d5eceSKris Buschelman   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2212a23d5eceSKris Buschelman 
2213a23d5eceSKris Buschelman   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2214a23d5eceSKris Buschelman   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2215a23d5eceSKris Buschelman   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
221677431f27SBarry Smith   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
221777431f27SBarry Smith   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2218a23d5eceSKris Buschelman   if (d_nnz) {
2219a23d5eceSKris Buschelman   for (i=0; i<B->m/bs; i++) {
222077431f27SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2221a23d5eceSKris Buschelman     }
2222a23d5eceSKris Buschelman   }
2223a23d5eceSKris Buschelman   if (o_nnz) {
2224a23d5eceSKris Buschelman     for (i=0; i<B->m/bs; i++) {
222577431f27SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2226a23d5eceSKris Buschelman     }
2227a23d5eceSKris Buschelman   }
2228a23d5eceSKris Buschelman 
2229a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2230a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2231a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2232a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2233a23d5eceSKris Buschelman 
2234a23d5eceSKris Buschelman   b = (Mat_MPIBAIJ*)B->data;
2235521d7252SBarry Smith   B->bs  = bs;
2236a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2237a23d5eceSKris Buschelman   b->mbs = B->m/bs;
2238a23d5eceSKris Buschelman   b->nbs = B->n/bs;
2239a23d5eceSKris Buschelman   b->Mbs = B->M/bs;
2240a23d5eceSKris Buschelman   b->Nbs = B->N/bs;
2241a23d5eceSKris Buschelman 
2242b24ad042SBarry Smith   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2243a23d5eceSKris Buschelman   b->rowners[0]    = 0;
2244a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2245a23d5eceSKris Buschelman     b->rowners[i] += b->rowners[i-1];
2246a23d5eceSKris Buschelman   }
2247a23d5eceSKris Buschelman   b->rstart    = b->rowners[b->rank];
2248a23d5eceSKris Buschelman   b->rend      = b->rowners[b->rank+1];
2249a23d5eceSKris Buschelman 
2250b24ad042SBarry Smith   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2251a23d5eceSKris Buschelman   b->cowners[0] = 0;
2252a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2253a23d5eceSKris Buschelman     b->cowners[i] += b->cowners[i-1];
2254a23d5eceSKris Buschelman   }
2255a23d5eceSKris Buschelman   b->cstart    = b->cowners[b->rank];
2256a23d5eceSKris Buschelman   b->cend      = b->cowners[b->rank+1];
2257a23d5eceSKris Buschelman 
2258a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2259a23d5eceSKris Buschelman     b->rowners_bs[i] = b->rowners[i]*bs;
2260a23d5eceSKris Buschelman   }
2261a23d5eceSKris Buschelman   b->rstart_bs = b->rstart*bs;
2262a23d5eceSKris Buschelman   b->rend_bs   = b->rend*bs;
2263a23d5eceSKris Buschelman   b->cstart_bs = b->cstart*bs;
2264a23d5eceSKris Buschelman   b->cend_bs   = b->cend*bs;
2265a23d5eceSKris Buschelman 
2266f69a0ea3SMatthew Knepley   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2267f69a0ea3SMatthew Knepley   ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr);
22689c097c71SKris Buschelman   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2269c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
227052e6d16bSBarry Smith   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2271f69a0ea3SMatthew Knepley   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2272f69a0ea3SMatthew Knepley   ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr);
22739c097c71SKris Buschelman   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2274c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
227552e6d16bSBarry Smith   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2276c60e587dSKris Buschelman 
2277a23d5eceSKris Buschelman   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2278a23d5eceSKris Buschelman 
2279a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2280a23d5eceSKris Buschelman }
2281a23d5eceSKris Buschelman EXTERN_C_END
2282a23d5eceSKris Buschelman 
2283a23d5eceSKris Buschelman EXTERN_C_BEGIN
2284be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2285be1d678aSKris Buschelman EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
228692b32695SKris Buschelman EXTERN_C_END
22875bf65638SKris Buschelman 
22880bad9183SKris Buschelman /*MC
2289fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
22900bad9183SKris Buschelman 
22910bad9183SKris Buschelman    Options Database Keys:
22920bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
22930bad9183SKris Buschelman 
22940bad9183SKris Buschelman   Level: beginner
22950bad9183SKris Buschelman 
22960bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ
22970bad9183SKris Buschelman M*/
22980bad9183SKris Buschelman 
229992b32695SKris Buschelman EXTERN_C_BEGIN
2300a23d5eceSKris Buschelman #undef __FUNCT__
23014a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ"
2302be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2303273d9f13SBarry Smith {
2304273d9f13SBarry Smith   Mat_MPIBAIJ    *b;
2305dfbe8321SBarry Smith   PetscErrorCode ierr;
2306273d9f13SBarry Smith   PetscTruth     flg;
2307273d9f13SBarry Smith 
2308273d9f13SBarry Smith   PetscFunctionBegin;
230982502324SSatish Balay   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
231082502324SSatish Balay   B->data = (void*)b;
231182502324SSatish Balay 
2312085a36d4SBarry Smith 
2313273d9f13SBarry Smith   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2314273d9f13SBarry Smith   B->mapping    = 0;
2315273d9f13SBarry Smith   B->factor     = 0;
2316273d9f13SBarry Smith   B->assembled  = PETSC_FALSE;
2317273d9f13SBarry Smith 
2318273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
2319273d9f13SBarry Smith   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2320273d9f13SBarry Smith   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2321273d9f13SBarry Smith 
2322273d9f13SBarry Smith   /* build local table of row and column ownerships */
2323b24ad042SBarry Smith   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
232452e6d16bSBarry Smith   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2325273d9f13SBarry Smith   b->cowners    = b->rowners + b->size + 2;
2326273d9f13SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2327273d9f13SBarry Smith 
2328273d9f13SBarry Smith   /* build cache for off array entries formed */
2329273d9f13SBarry Smith   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2330273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
2331273d9f13SBarry Smith   b->colmap      = PETSC_NULL;
2332273d9f13SBarry Smith   b->garray      = PETSC_NULL;
2333273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2334273d9f13SBarry Smith 
2335cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
2336273d9f13SBarry Smith   /* stuff for MatSetValues_XXX in single precision */
233764a35ccbSBarry Smith   b->setvalueslen     = 0;
2338273d9f13SBarry Smith   b->setvaluescopy    = PETSC_NULL;
2339273d9f13SBarry Smith #endif
2340273d9f13SBarry Smith 
2341273d9f13SBarry Smith   /* stuff used in block assembly */
2342273d9f13SBarry Smith   b->barray       = 0;
2343273d9f13SBarry Smith 
2344273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2345273d9f13SBarry Smith   b->lvec         = 0;
2346273d9f13SBarry Smith   b->Mvctx        = 0;
2347273d9f13SBarry Smith 
2348273d9f13SBarry Smith   /* stuff for MatGetRow() */
2349273d9f13SBarry Smith   b->rowindices   = 0;
2350273d9f13SBarry Smith   b->rowvalues    = 0;
2351273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2352273d9f13SBarry Smith 
2353273d9f13SBarry Smith   /* hash table stuff */
2354273d9f13SBarry Smith   b->ht           = 0;
2355273d9f13SBarry Smith   b->hd           = 0;
2356273d9f13SBarry Smith   b->ht_size      = 0;
2357273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2358273d9f13SBarry Smith   b->ht_fact      = 0;
2359273d9f13SBarry Smith   b->ht_total_ct  = 0;
2360273d9f13SBarry Smith   b->ht_insert_ct = 0;
2361273d9f13SBarry Smith 
2362b0a32e0cSBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2363273d9f13SBarry Smith   if (flg) {
2364f6275e2eSBarry Smith     PetscReal fact = 1.39;
2365273d9f13SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
236687828ca2SBarry Smith     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2367273d9f13SBarry Smith     if (fact <= 1.0) fact = 1.39;
2368273d9f13SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2369*ae15b995SBarry Smith     ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2370273d9f13SBarry Smith   }
2371273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2372273d9f13SBarry Smith                                      "MatStoreValues_MPIBAIJ",
2373273d9f13SBarry Smith                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2374273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2375273d9f13SBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
2376273d9f13SBarry Smith                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2377273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2378273d9f13SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
2379273d9f13SBarry Smith                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2380a23d5eceSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2381a23d5eceSKris Buschelman                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2382a23d5eceSKris Buschelman                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2383aac34f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2384aac34f13SBarry Smith 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2385aac34f13SBarry Smith 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
238692b32695SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
238792b32695SKris Buschelman                                      "MatDiagonalScaleLocal_MPIBAIJ",
238892b32695SKris Buschelman                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
23895bf65638SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
23905bf65638SKris Buschelman                                      "MatSetHashTableFactor_MPIBAIJ",
23915bf65638SKris Buschelman                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2392273d9f13SBarry Smith   PetscFunctionReturn(0);
2393273d9f13SBarry Smith }
2394273d9f13SBarry Smith EXTERN_C_END
2395273d9f13SBarry Smith 
2396209238afSKris Buschelman /*MC
2397002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2398209238afSKris Buschelman 
2399209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2400209238afSKris Buschelman    and MATMPIBAIJ otherwise.
2401209238afSKris Buschelman 
2402209238afSKris Buschelman    Options Database Keys:
2403209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2404209238afSKris Buschelman 
2405209238afSKris Buschelman   Level: beginner
2406209238afSKris Buschelman 
2407aac34f13SBarry Smith .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2408209238afSKris Buschelman M*/
2409209238afSKris Buschelman 
2410209238afSKris Buschelman EXTERN_C_BEGIN
2411209238afSKris Buschelman #undef __FUNCT__
2412209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ"
2413be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2414dfbe8321SBarry Smith {
24156849ba73SBarry Smith   PetscErrorCode ierr;
2416b24ad042SBarry Smith   PetscMPIInt    size;
2417209238afSKris Buschelman 
2418209238afSKris Buschelman   PetscFunctionBegin;
2419209238afSKris Buschelman   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2420209238afSKris Buschelman   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2421209238afSKris Buschelman   if (size == 1) {
2422209238afSKris Buschelman     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2423209238afSKris Buschelman   } else {
2424209238afSKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2425209238afSKris Buschelman   }
2426209238afSKris Buschelman   PetscFunctionReturn(0);
2427209238afSKris Buschelman }
2428209238afSKris Buschelman EXTERN_C_END
2429209238afSKris Buschelman 
24304a2ae208SSatish Balay #undef __FUNCT__
24314a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2432273d9f13SBarry Smith /*@C
2433aac34f13SBarry Smith    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2434273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2435273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2436273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2437273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2438273d9f13SBarry Smith 
2439273d9f13SBarry Smith    Collective on Mat
2440273d9f13SBarry Smith 
2441273d9f13SBarry Smith    Input Parameters:
2442273d9f13SBarry Smith +  A - the matrix
2443273d9f13SBarry Smith .  bs   - size of blockk
2444273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2445273d9f13SBarry Smith            submatrix  (same for all local rows)
2446273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
2447273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
2448273d9f13SBarry Smith            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2449273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2450273d9f13SBarry Smith            submatrix (same for all local rows).
2451273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
2452273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
2453273d9f13SBarry Smith            each block row) or PETSC_NULL.
2454273d9f13SBarry Smith 
245549a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
2456273d9f13SBarry Smith 
2457273d9f13SBarry Smith    Options Database Keys:
2458273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2459273d9f13SBarry Smith                      block calculations (much slower)
2460273d9f13SBarry Smith .   -mat_block_size - size of the blocks to use
2461273d9f13SBarry Smith 
2462273d9f13SBarry Smith    Notes:
2463273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2464273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
2465273d9f13SBarry Smith 
2466273d9f13SBarry Smith    Storage Information:
2467273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
2468273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
2469273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
2470273d9f13SBarry Smith    local matrix (a rectangular submatrix).
2471273d9f13SBarry Smith 
2472273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
2473273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
2474273d9f13SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2475273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
2476273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2477273d9f13SBarry Smith 
2478273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2479273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
2480273d9f13SBarry Smith 
2481273d9f13SBarry Smith .vb
2482273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
2483273d9f13SBarry Smith           -------------------
2484273d9f13SBarry Smith    row 3  |  o o o d d d o o o o o o
2485273d9f13SBarry Smith    row 4  |  o o o d d d o o o o o o
2486273d9f13SBarry Smith    row 5  |  o o o d d d o o o o o o
2487273d9f13SBarry Smith           -------------------
2488273d9f13SBarry Smith .ve
2489273d9f13SBarry Smith 
2490273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
2491273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
2492273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2493273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
2494273d9f13SBarry Smith 
2495273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2496273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2497273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
2498273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2499273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
2500273d9f13SBarry Smith    matrices.
2501273d9f13SBarry Smith 
2502273d9f13SBarry Smith    Level: intermediate
2503273d9f13SBarry Smith 
2504273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel
2505273d9f13SBarry Smith 
2506aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2507273d9f13SBarry Smith @*/
2508be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2509273d9f13SBarry Smith {
2510b24ad042SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2511273d9f13SBarry Smith 
2512273d9f13SBarry Smith   PetscFunctionBegin;
2513a23d5eceSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2514a23d5eceSKris Buschelman   if (f) {
2515a23d5eceSKris Buschelman     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2516273d9f13SBarry Smith   }
2517273d9f13SBarry Smith   PetscFunctionReturn(0);
2518273d9f13SBarry Smith }
2519273d9f13SBarry Smith 
25204a2ae208SSatish Balay #undef __FUNCT__
25214a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ"
252279bdfe76SSatish Balay /*@C
252379bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
252479bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
252579bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
252679bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
252779bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
252879bdfe76SSatish Balay 
2529db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
2530db81eaa0SLois Curfman McInnes 
253179bdfe76SSatish Balay    Input Parameters:
2532db81eaa0SLois Curfman McInnes +  comm - MPI communicator
253379bdfe76SSatish Balay .  bs   - size of blockk
253479bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
253592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
253692e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
253792e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
253892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
253992e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
2540be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2541be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
254247a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
254379bdfe76SSatish Balay            submatrix  (same for all local rows)
254447a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
254592e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2546db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
254747a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
254879bdfe76SSatish Balay            submatrix (same for all local rows).
254947a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
255092e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
255192e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
255279bdfe76SSatish Balay 
255379bdfe76SSatish Balay    Output Parameter:
255479bdfe76SSatish Balay .  A - the matrix
255579bdfe76SSatish Balay 
2556db81eaa0SLois Curfman McInnes    Options Database Keys:
2557db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
2558db81eaa0SLois Curfman McInnes                      block calculations (much slower)
2559db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
25603ffaccefSLois Curfman McInnes 
2561b259b22eSLois Curfman McInnes    Notes:
256249a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
256349a6f317SBarry Smith 
256447a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
256547a75d0bSBarry Smith 
256679bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
256779bdfe76SSatish Balay    (possibly both).
256879bdfe76SSatish Balay 
2569be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2570be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2571be79a94dSBarry Smith 
257279bdfe76SSatish Balay    Storage Information:
257379bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
257479bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
257579bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
257679bdfe76SSatish Balay    local matrix (a rectangular submatrix).
257779bdfe76SSatish Balay 
257879bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
257979bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
258079bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
258179bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
258279bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
258379bdfe76SSatish Balay 
258479bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
258579bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
258679bdfe76SSatish Balay 
2587db81eaa0SLois Curfman McInnes .vb
2588db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2589db81eaa0SLois Curfman McInnes           -------------------
2590db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2591db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2592db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2593db81eaa0SLois Curfman McInnes           -------------------
2594db81eaa0SLois Curfman McInnes .ve
259579bdfe76SSatish Balay 
259679bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
259779bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
259879bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
259957b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
260079bdfe76SSatish Balay 
2601d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2602d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
260379bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
260492e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
260592e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
26066da5968aSLois Curfman McInnes    matrices.
260779bdfe76SSatish Balay 
2608027ccd11SLois Curfman McInnes    Level: intermediate
2609027ccd11SLois Curfman McInnes 
261092e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
261179bdfe76SSatish Balay 
2612aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
261379bdfe76SSatish Balay @*/
2614be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
261579bdfe76SSatish Balay {
26166849ba73SBarry Smith   PetscErrorCode ierr;
2617b24ad042SBarry Smith   PetscMPIInt    size;
261879bdfe76SSatish Balay 
2619d64ed03dSBarry Smith   PetscFunctionBegin;
2620f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2621f69a0ea3SMatthew Knepley   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2622d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2623273d9f13SBarry Smith   if (size > 1) {
2624273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2625273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2626273d9f13SBarry Smith   } else {
2627273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2628273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
26293914022bSBarry Smith   }
26303a40ed3dSBarry Smith   PetscFunctionReturn(0);
263179bdfe76SSatish Balay }
2632026e39d0SSatish Balay 
26334a2ae208SSatish Balay #undef __FUNCT__
26344a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ"
26356849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
26360ac07820SSatish Balay {
26370ac07820SSatish Balay   Mat            mat;
26380ac07820SSatish Balay   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2639dfbe8321SBarry Smith   PetscErrorCode ierr;
2640b24ad042SBarry Smith   PetscInt       len=0;
26410ac07820SSatish Balay 
2642d64ed03dSBarry Smith   PetscFunctionBegin;
26430ac07820SSatish Balay   *newmat       = 0;
2644f69a0ea3SMatthew Knepley   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2645f69a0ea3SMatthew Knepley   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
2646be5d1d56SKris Buschelman   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
26471d5dac46SHong Zhang   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
26487fff6886SHong Zhang 
26494beb1cfeSHong Zhang   mat->factor       = matin->factor;
2650273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
26510ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
26527fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
26537fff6886SHong Zhang 
2654273d9f13SBarry Smith   a      = (Mat_MPIBAIJ*)mat->data;
2655521d7252SBarry Smith   mat->bs  = matin->bs;
26560ac07820SSatish Balay   a->bs2   = oldmat->bs2;
26570ac07820SSatish Balay   a->mbs   = oldmat->mbs;
26580ac07820SSatish Balay   a->nbs   = oldmat->nbs;
26590ac07820SSatish Balay   a->Mbs   = oldmat->Mbs;
26600ac07820SSatish Balay   a->Nbs   = oldmat->Nbs;
26610ac07820SSatish Balay 
26620ac07820SSatish Balay   a->rstart       = oldmat->rstart;
26630ac07820SSatish Balay   a->rend         = oldmat->rend;
26640ac07820SSatish Balay   a->cstart       = oldmat->cstart;
26650ac07820SSatish Balay   a->cend         = oldmat->cend;
26660ac07820SSatish Balay   a->size         = oldmat->size;
26670ac07820SSatish Balay   a->rank         = oldmat->rank;
2668aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2669aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2670aef5e8e0SSatish Balay   a->rowindices   = 0;
26710ac07820SSatish Balay   a->rowvalues    = 0;
26720ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
267330793edcSSatish Balay   a->barray       = 0;
26743123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
26753123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
26763123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
26773123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
26780ac07820SSatish Balay 
2679133cdb44SSatish Balay   /* hash table stuff */
2680133cdb44SSatish Balay   a->ht           = 0;
2681133cdb44SSatish Balay   a->hd           = 0;
2682133cdb44SSatish Balay   a->ht_size      = 0;
2683133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
268425fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2685133cdb44SSatish Balay   a->ht_total_ct  = 0;
2686133cdb44SSatish Balay   a->ht_insert_ct = 0;
2687133cdb44SSatish Balay 
2688b24ad042SBarry Smith   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
26898798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2690521d7252SBarry Smith   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
26910ac07820SSatish Balay   if (oldmat->colmap) {
2692aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
26930f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
269448e59246SSatish Balay #else
2695b24ad042SBarry Smith   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
269652e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2697b24ad042SBarry Smith   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
269848e59246SSatish Balay #endif
26990ac07820SSatish Balay   } else a->colmap = 0;
27004beb1cfeSHong Zhang 
27010ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2702b24ad042SBarry Smith     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
270352e6d16bSBarry Smith     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2704b24ad042SBarry Smith     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
27050ac07820SSatish Balay   } else a->garray = 0;
27060ac07820SSatish Balay 
27070ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
270852e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
27090ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
271052e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
27117fff6886SHong Zhang 
27122e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
271352e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
27142e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
271552e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2716b0a32e0cSBarry Smith   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
27170ac07820SSatish Balay   *newmat = mat;
27184beb1cfeSHong Zhang 
27193a40ed3dSBarry Smith   PetscFunctionReturn(0);
27200ac07820SSatish Balay }
272157b952d6SSatish Balay 
2722e090d566SSatish Balay #include "petscsys.h"
272357b952d6SSatish Balay 
27244a2ae208SSatish Balay #undef __FUNCT__
27254a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ"
2726f69a0ea3SMatthew Knepley PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
272757b952d6SSatish Balay {
272857b952d6SSatish Balay   Mat            A;
27296849ba73SBarry Smith   PetscErrorCode ierr;
2730b24ad042SBarry Smith   int            fd;
2731b24ad042SBarry Smith   PetscInt       i,nz,j,rstart,rend;
273287828ca2SBarry Smith   PetscScalar    *vals,*buf;
273357b952d6SSatish Balay   MPI_Comm       comm = ((PetscObject)viewer)->comm;
273457b952d6SSatish Balay   MPI_Status     status;
2735b24ad042SBarry Smith   PetscMPIInt    rank,size,maxnz;
2736b24ad042SBarry Smith   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2737dc231df0SBarry Smith   PetscInt       *locrowlens,*procsnz = 0,*browners;
2738167e7480SBarry Smith   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2739dc231df0SBarry Smith   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2740b24ad042SBarry Smith   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2741dc231df0SBarry Smith   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
274257b952d6SSatish Balay 
2743d64ed03dSBarry Smith   PetscFunctionBegin;
2744b0a32e0cSBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
274557b952d6SSatish Balay 
2746d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2747d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
274857b952d6SSatish Balay   if (!rank) {
2749b0a32e0cSBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2750e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2751552e946dSBarry Smith     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
27526c5fab8fSBarry Smith   }
2753d64ed03dSBarry Smith 
2754b24ad042SBarry Smith   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
275557b952d6SSatish Balay   M = header[1]; N = header[2];
275657b952d6SSatish Balay 
275729bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
275857b952d6SSatish Balay 
275957b952d6SSatish Balay   /*
276057b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
276157b952d6SSatish Balay      divisible by the blocksize
276257b952d6SSatish Balay   */
276357b952d6SSatish Balay   Mbs        = M/bs;
2764dc231df0SBarry Smith   extra_rows = bs - M + bs*Mbs;
276557b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
276657b952d6SSatish Balay   else                  Mbs++;
276757b952d6SSatish Balay   if (extra_rows && !rank) {
2768*ae15b995SBarry Smith     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
276957b952d6SSatish Balay   }
2770537820f0SBarry Smith 
277157b952d6SSatish Balay   /* determine ownership of all rows */
277257b952d6SSatish Balay   mbs        = Mbs/size + ((Mbs % size) > rank);
277357b952d6SSatish Balay   m          = mbs*bs;
2774dc231df0SBarry Smith   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2775b24ad042SBarry Smith   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2776167e7480SBarry Smith 
2777167e7480SBarry Smith   /* process 0 needs enough room for process with most rows */
2778167e7480SBarry Smith   if (!rank) {
2779167e7480SBarry Smith     mmax = rowners[1];
2780167e7480SBarry Smith     for (i=2; i<size; i++) {
2781167e7480SBarry Smith       mmax = PetscMax(mmax,rowners[i]);
2782167e7480SBarry Smith     }
2783ca02efcfSSatish Balay     mmax*=bs;
2784167e7480SBarry Smith   } else mmax = m;
2785167e7480SBarry Smith 
278657b952d6SSatish Balay   rowners[0] = 0;
2787cee3aa6bSSatish Balay   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2788cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
278957b952d6SSatish Balay   rstart = rowners[rank];
279057b952d6SSatish Balay   rend   = rowners[rank+1];
279157b952d6SSatish Balay 
279257b952d6SSatish Balay   /* distribute row lengths to all processors */
279319c38ff2SBarry Smith   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
279457b952d6SSatish Balay   if (!rank) {
2795dc231df0SBarry Smith     mend = m;
2796dc231df0SBarry Smith     if (size == 1) mend = mend - extra_rows;
2797dc231df0SBarry Smith     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2798dc231df0SBarry Smith     for (j=mend; j<m; j++) locrowlens[j] = 1;
2799dc231df0SBarry Smith     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2800b24ad042SBarry Smith     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2801b24ad042SBarry Smith     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2802dc231df0SBarry Smith     for (j=0; j<m; j++) {
2803dc231df0SBarry Smith       procsnz[0] += locrowlens[j];
2804dc231df0SBarry Smith     }
2805dc231df0SBarry Smith     for (i=1; i<size; i++) {
2806dc231df0SBarry Smith       mend = browners[i+1] - browners[i];
2807dc231df0SBarry Smith       if (i == size-1) mend = mend - extra_rows;
2808dc231df0SBarry Smith       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2809dc231df0SBarry Smith       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2810dc231df0SBarry Smith       /* calculate the number of nonzeros on each processor */
2811dc231df0SBarry Smith       for (j=0; j<browners[i+1]-browners[i]; j++) {
281257b952d6SSatish Balay         procsnz[i] += rowlengths[j];
281357b952d6SSatish Balay       }
2814dc231df0SBarry Smith       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281557b952d6SSatish Balay     }
2816606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2817dc231df0SBarry Smith   } else {
2818dc231df0SBarry Smith     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2819dc231df0SBarry Smith   }
282057b952d6SSatish Balay 
2821dc231df0SBarry Smith   if (!rank) {
282257b952d6SSatish Balay     /* determine max buffer needed and allocate it */
28238a8e0b3aSBarry Smith     maxnz = procsnz[0];
2824cdc0ba36SBarry Smith     for (i=1; i<size; i++) {
282557b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
282657b952d6SSatish Balay     }
2827b24ad042SBarry Smith     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
282857b952d6SSatish Balay 
282957b952d6SSatish Balay     /* read in my part of the matrix column indices  */
283057b952d6SSatish Balay     nz     = procsnz[0];
283119c38ff2SBarry Smith     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
283257b952d6SSatish Balay     mycols = ibuf;
2833cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2834e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2835cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2836cee3aa6bSSatish Balay 
283757b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
283857b952d6SSatish Balay     for (i=1; i<size-1; i++) {
283957b952d6SSatish Balay       nz   = procsnz[i];
2840e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2841b24ad042SBarry Smith       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
284257b952d6SSatish Balay     }
284357b952d6SSatish Balay     /* read in the stuff for the last proc */
284457b952d6SSatish Balay     if (size != 1) {
284557b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2846e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
284757b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2848b24ad042SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
284957b952d6SSatish Balay     }
2850606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2851d64ed03dSBarry Smith   } else {
285257b952d6SSatish Balay     /* determine buffer space needed for message */
285357b952d6SSatish Balay     nz = 0;
285457b952d6SSatish Balay     for (i=0; i<m; i++) {
285557b952d6SSatish Balay       nz += locrowlens[i];
285657b952d6SSatish Balay     }
285719c38ff2SBarry Smith     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
285857b952d6SSatish Balay     mycols = ibuf;
285957b952d6SSatish Balay     /* receive message of column indices*/
2860b24ad042SBarry Smith     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2861b24ad042SBarry Smith     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
286229bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
286357b952d6SSatish Balay   }
286457b952d6SSatish Balay 
286557b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2866dc231df0SBarry Smith   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2867dc231df0SBarry Smith   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2868dc231df0SBarry Smith   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2869dc231df0SBarry Smith   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2870dc231df0SBarry Smith   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
287157b952d6SSatish Balay   rowcount = 0; nzcount = 0;
287257b952d6SSatish Balay   for (i=0; i<mbs; i++) {
287357b952d6SSatish Balay     dcount  = 0;
287457b952d6SSatish Balay     odcount = 0;
287557b952d6SSatish Balay     for (j=0; j<bs; j++) {
287657b952d6SSatish Balay       kmax = locrowlens[rowcount];
287757b952d6SSatish Balay       for (k=0; k<kmax; k++) {
287857b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
287957b952d6SSatish Balay         if (!mask[tmp]) {
288057b952d6SSatish Balay           mask[tmp] = 1;
288157b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
288257b952d6SSatish Balay           else masked1[dcount++] = tmp;
288357b952d6SSatish Balay         }
288457b952d6SSatish Balay       }
288557b952d6SSatish Balay       rowcount++;
288657b952d6SSatish Balay     }
2887cee3aa6bSSatish Balay 
288857b952d6SSatish Balay     dlens[i]  = dcount;
288957b952d6SSatish Balay     odlens[i] = odcount;
2890cee3aa6bSSatish Balay 
289157b952d6SSatish Balay     /* zero out the mask elements we set */
289257b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
289357b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
289457b952d6SSatish Balay   }
2895cee3aa6bSSatish Balay 
289657b952d6SSatish Balay   /* create our matrix */
2897f69a0ea3SMatthew Knepley   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2898f69a0ea3SMatthew Knepley   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
289978ae41b4SKris Buschelman   ierr = MatSetType(A,type);CHKERRQ(ierr)
290078ae41b4SKris Buschelman   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
290178ae41b4SKris Buschelman 
290278ae41b4SKris Buschelman   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2903dc231df0SBarry Smith   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
290457b952d6SSatish Balay 
290557b952d6SSatish Balay   if (!rank) {
290619c38ff2SBarry Smith     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
290757b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
290857b952d6SSatish Balay     nz = procsnz[0];
290957b952d6SSatish Balay     vals = buf;
2910cee3aa6bSSatish Balay     mycols = ibuf;
2911cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2912e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2913cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2914537820f0SBarry Smith 
291557b952d6SSatish Balay     /* insert into matrix */
291657b952d6SSatish Balay     jj      = rstart*bs;
291757b952d6SSatish Balay     for (i=0; i<m; i++) {
2918dc231df0SBarry Smith       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
291957b952d6SSatish Balay       mycols += locrowlens[i];
292057b952d6SSatish Balay       vals   += locrowlens[i];
292157b952d6SSatish Balay       jj++;
292257b952d6SSatish Balay     }
292357b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
292457b952d6SSatish Balay     for (i=1; i<size-1; i++) {
292557b952d6SSatish Balay       nz   = procsnz[i];
292657b952d6SSatish Balay       vals = buf;
2927e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2928ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
292957b952d6SSatish Balay     }
293057b952d6SSatish Balay     /* the last proc */
293157b952d6SSatish Balay     if (size != 1){
293257b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2933cee3aa6bSSatish Balay       vals = buf;
2934e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
293557b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2936ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
293757b952d6SSatish Balay     }
2938606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2939d64ed03dSBarry Smith   } else {
294057b952d6SSatish Balay     /* receive numeric values */
294119c38ff2SBarry Smith     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
294257b952d6SSatish Balay 
294357b952d6SSatish Balay     /* receive message of values*/
294457b952d6SSatish Balay     vals   = buf;
2945cee3aa6bSSatish Balay     mycols = ibuf;
2946ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2947ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
294829bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
294957b952d6SSatish Balay 
295057b952d6SSatish Balay     /* insert into matrix */
295157b952d6SSatish Balay     jj      = rstart*bs;
2952cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
2953dc231df0SBarry Smith       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
295457b952d6SSatish Balay       mycols += locrowlens[i];
295557b952d6SSatish Balay       vals   += locrowlens[i];
295657b952d6SSatish Balay       jj++;
295757b952d6SSatish Balay     }
295857b952d6SSatish Balay   }
2959606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2960606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2961606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2962dc231df0SBarry Smith   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2963dc231df0SBarry Smith   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2964dc231df0SBarry Smith   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
29656d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
29666d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
296778ae41b4SKris Buschelman 
296878ae41b4SKris Buschelman   *newmat = A;
29693a40ed3dSBarry Smith   PetscFunctionReturn(0);
297057b952d6SSatish Balay }
297157b952d6SSatish Balay 
29724a2ae208SSatish Balay #undef __FUNCT__
29734a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2974133cdb44SSatish Balay /*@
2975133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2976133cdb44SSatish Balay 
2977133cdb44SSatish Balay    Input Parameters:
2978133cdb44SSatish Balay .  mat  - the matrix
2979133cdb44SSatish Balay .  fact - factor
2980133cdb44SSatish Balay 
2981fee21e36SBarry Smith    Collective on Mat
2982fee21e36SBarry Smith 
29838c890885SBarry Smith    Level: advanced
29848c890885SBarry Smith 
2985133cdb44SSatish Balay   Notes:
2986133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2987133cdb44SSatish Balay 
2988133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2989133cdb44SSatish Balay 
2990133cdb44SSatish Balay .seealso: MatSetOption()
2991133cdb44SSatish Balay @*/
2992be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2993133cdb44SSatish Balay {
2994dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscReal);
29955bf65638SKris Buschelman 
29965bf65638SKris Buschelman   PetscFunctionBegin;
29975bf65638SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
29985bf65638SKris Buschelman   if (f) {
29995bf65638SKris Buschelman     ierr = (*f)(mat,fact);CHKERRQ(ierr);
30005bf65638SKris Buschelman   }
30015bf65638SKris Buschelman   PetscFunctionReturn(0);
30025bf65638SKris Buschelman }
30035bf65638SKris Buschelman 
3004be1d678aSKris Buschelman EXTERN_C_BEGIN
30055bf65638SKris Buschelman #undef __FUNCT__
30065bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3007be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
30085bf65638SKris Buschelman {
300925fdafccSSatish Balay   Mat_MPIBAIJ *baij;
3010133cdb44SSatish Balay 
3011133cdb44SSatish Balay   PetscFunctionBegin;
3012133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
3013133cdb44SSatish Balay   baij->ht_fact = fact;
3014133cdb44SSatish Balay   PetscFunctionReturn(0);
3015133cdb44SSatish Balay }
3016be1d678aSKris Buschelman EXTERN_C_END
3017f2a5309cSSatish Balay 
30184a2ae208SSatish Balay #undef __FUNCT__
30194a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3020be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3021f2a5309cSSatish Balay {
3022f2a5309cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3023f2a5309cSSatish Balay   PetscFunctionBegin;
3024f2a5309cSSatish Balay   *Ad     = a->A;
3025f2a5309cSSatish Balay   *Ao     = a->B;
3026195d93cdSBarry Smith   *colmap = a->garray;
3027f2a5309cSSatish Balay   PetscFunctionReturn(0);
3028f2a5309cSSatish Balay }
3029