xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 577dd1f9a102020c144c6b00e57733021316631d)
179bdfe76SSatish Balay 
2e090d566SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
379bdfe76SSatish Balay 
4dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5dfbe8321SBarry Smith EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6b24ad042SBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
7b24ad042SBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
8b24ad042SBarry Smith EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11b24ad042SBarry Smith EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12b24ad042SBarry Smith EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13dfbe8321SBarry Smith EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14dfbe8321SBarry Smith EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
1593fea6afSBarry Smith 
1693fea6afSBarry Smith /*  UGLY, ugly, ugly
1787828ca2SBarry Smith    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
1893fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
1993fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2093fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2193fea6afSBarry Smith    into the single precision data structures.
2293fea6afSBarry Smith */
2393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
24b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
2993fea6afSBarry Smith #else
306fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
3193fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
3293fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
336fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
346fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
3593fea6afSBarry Smith #endif
363b2fbd54SBarry Smith 
374a2ae208SSatish Balay #undef __FUNCT__
384a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39dfbe8321SBarry Smith PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
407843d17aSBarry Smith {
417843d17aSBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42dfbe8321SBarry Smith   PetscErrorCode ierr;
43b24ad042SBarry Smith   PetscInt       i;
4487828ca2SBarry Smith   PetscScalar    *va,*vb;
457843d17aSBarry Smith   Vec            vtmp;
467843d17aSBarry Smith 
477843d17aSBarry Smith   PetscFunctionBegin;
487843d17aSBarry Smith 
497843d17aSBarry Smith   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
501ebc52fbSHong Zhang   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
517843d17aSBarry Smith 
52ac355199SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
537843d17aSBarry Smith   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
541ebc52fbSHong Zhang   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
557843d17aSBarry Smith 
56273d9f13SBarry Smith   for (i=0; i<A->m; i++){
57273d9f13SBarry Smith     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
587843d17aSBarry Smith   }
597843d17aSBarry Smith 
601ebc52fbSHong Zhang   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
611ebc52fbSHong Zhang   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62ac355199SBarry Smith   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
637843d17aSBarry Smith 
647843d17aSBarry Smith   PetscFunctionReturn(0);
657843d17aSBarry Smith }
667843d17aSBarry Smith 
677fc3c18eSBarry Smith EXTERN_C_BEGIN
684a2ae208SSatish Balay #undef __FUNCT__
694a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70dfbe8321SBarry Smith PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
717fc3c18eSBarry Smith {
727fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
73dfbe8321SBarry Smith   PetscErrorCode ierr;
747fc3c18eSBarry Smith 
757fc3c18eSBarry Smith   PetscFunctionBegin;
767fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
777fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
787fc3c18eSBarry Smith   PetscFunctionReturn(0);
797fc3c18eSBarry Smith }
807fc3c18eSBarry Smith EXTERN_C_END
817fc3c18eSBarry Smith 
827fc3c18eSBarry Smith EXTERN_C_BEGIN
834a2ae208SSatish Balay #undef __FUNCT__
844a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85dfbe8321SBarry Smith PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
867fc3c18eSBarry Smith {
877fc3c18eSBarry Smith   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
88dfbe8321SBarry Smith   PetscErrorCode ierr;
897fc3c18eSBarry Smith 
907fc3c18eSBarry Smith   PetscFunctionBegin;
917fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
927fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
937fc3c18eSBarry Smith   PetscFunctionReturn(0);
947fc3c18eSBarry Smith }
957fc3c18eSBarry Smith EXTERN_C_END
967fc3c18eSBarry Smith 
97537820f0SBarry Smith /*
98537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
9957b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
10057b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
10157b952d6SSatish Balay    length of colmap equals the global matrix length.
10257b952d6SSatish Balay */
1034a2ae208SSatish Balay #undef __FUNCT__
1044a2ae208SSatish Balay #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105dfbe8321SBarry Smith PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
10657b952d6SSatish Balay {
10757b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
10857b952d6SSatish Balay   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
1096849ba73SBarry Smith   PetscErrorCode ierr;
110521d7252SBarry Smith   PetscInt       nbs = B->nbs,i,bs=mat->bs;
11157b952d6SSatish Balay 
112d64ed03dSBarry Smith   PetscFunctionBegin;
113aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
114f14a1c24SBarry Smith   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
11548e59246SSatish Balay   for (i=0; i<nbs; i++){
1160f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
11748e59246SSatish Balay   }
11848e59246SSatish Balay #else
119b24ad042SBarry Smith   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
12052e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
121b24ad042SBarry Smith   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
12348e59246SSatish Balay #endif
1243a40ed3dSBarry Smith   PetscFunctionReturn(0);
12557b952d6SSatish Balay }
12657b952d6SSatish Balay 
12780c1aa95SSatish Balay #define CHUNKSIZE  10
12880c1aa95SSatish Balay 
129f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
13080c1aa95SSatish Balay { \
13180c1aa95SSatish Balay  \
13280c1aa95SSatish Balay     brow = row/bs;  \
13380c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
13580c1aa95SSatish Balay       bcol = col/bs; \
13680c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
137ab26458aSBarry Smith       low = 0; high = nrow; \
138ab26458aSBarry Smith       while (high-low > 3) { \
139ab26458aSBarry Smith         t = (low+high)/2; \
140ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
141ab26458aSBarry Smith         else              low  = t; \
142ab26458aSBarry Smith       } \
143ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
14480c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
14580c1aa95SSatish Balay         if (rp[_i] == bcol) { \
14680c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
148eada6651SSatish Balay           else                    *bap  = value;  \
149ac7a638eSSatish Balay           goto a_noinsert; \
15080c1aa95SSatish Balay         } \
15180c1aa95SSatish Balay       } \
15289280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
15377431f27SBarry Smith       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
15480c1aa95SSatish Balay       if (nrow >= rmax) { \
15580c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
156b24ad042SBarry Smith         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
1573eda8832SBarry Smith         MatScalar *new_a; \
15880c1aa95SSatish Balay  \
15977431f27SBarry Smith         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
16089280ab3SLois Curfman McInnes  \
16180c1aa95SSatish Balay         /* malloc new storage space */ \
162b24ad042SBarry Smith         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
16382502324SSatish Balay         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164b24ad042SBarry Smith         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
16580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
16680c1aa95SSatish Balay  \
16780c1aa95SSatish Balay         /* copy over old data into new slots */ \
16880c1aa95SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
16980c1aa95SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170b24ad042SBarry Smith         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
17180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172b24ad042SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
1733eda8832SBarry Smith         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
17487828ca2SBarry Smith         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
1763eda8832SBarry Smith                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
17780c1aa95SSatish Balay         /* free up old matrix storage */ \
178606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179606d414cSSatish Balay         if (!a->singlemalloc) { \
180606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182606d414cSSatish Balay         } \
18380c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1847c922b88SBarry Smith         a->singlemalloc = PETSC_TRUE; \
18580c1aa95SSatish Balay  \
18680c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
18852e6d16bSBarry Smith         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
18980c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
19080c1aa95SSatish Balay         a->reallocs++; \
19180c1aa95SSatish Balay         a->nz++; \
19280c1aa95SSatish Balay       } \
19380c1aa95SSatish Balay       N = nrow++ - 1;  \
19480c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
19580c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
19680c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1973eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
19880c1aa95SSatish Balay       } \
1993eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
20080c1aa95SSatish Balay       rp[_i]                      = bcol;  \
20180c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
202ac7a638eSSatish Balay       a_noinsert:; \
20380c1aa95SSatish Balay     ailen[brow] = nrow; \
20480c1aa95SSatish Balay }
20557b952d6SSatish Balay 
206ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207ac7a638eSSatish Balay { \
208ac7a638eSSatish Balay     brow = row/bs;  \
209ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
211ac7a638eSSatish Balay       bcol = col/bs; \
212ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
213ac7a638eSSatish Balay       low = 0; high = nrow; \
214ac7a638eSSatish Balay       while (high-low > 3) { \
215ac7a638eSSatish Balay         t = (low+high)/2; \
216ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
217ac7a638eSSatish Balay         else              low  = t; \
218ac7a638eSSatish Balay       } \
219ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
220ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
221ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
222ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
224ac7a638eSSatish Balay           else                    *bap  = value;  \
225ac7a638eSSatish Balay           goto b_noinsert; \
226ac7a638eSSatish Balay         } \
227ac7a638eSSatish Balay       } \
22889280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
22977431f27SBarry Smith       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
230ac7a638eSSatish Balay       if (nrow >= rmax) { \
231ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
232b24ad042SBarry Smith         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
2333eda8832SBarry Smith         MatScalar *new_a; \
234ac7a638eSSatish Balay  \
23577431f27SBarry Smith         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
23689280ab3SLois Curfman McInnes  \
237ac7a638eSSatish Balay         /* malloc new storage space */ \
238b24ad042SBarry Smith         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
23982502324SSatish Balay         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240b24ad042SBarry Smith         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
241ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
242ac7a638eSSatish Balay  \
243ac7a638eSSatish Balay         /* copy over old data into new slots */ \
244ac7a638eSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
24574c639caSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246b24ad042SBarry Smith         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
247ac7a638eSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248b24ad042SBarry Smith         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
2493eda8832SBarry Smith         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
2503eda8832SBarry Smith         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
2523eda8832SBarry Smith                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253ac7a638eSSatish Balay         /* free up old matrix storage */ \
254606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255606d414cSSatish Balay         if (!b->singlemalloc) { \
256606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258606d414cSSatish Balay         } \
25974c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
2607c922b88SBarry Smith         b->singlemalloc = PETSC_TRUE; \
261ac7a638eSSatish Balay  \
262ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
26452e6d16bSBarry Smith         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
26574c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
26674c639caSSatish Balay         b->reallocs++; \
26774c639caSSatish Balay         b->nz++; \
268ac7a638eSSatish Balay       } \
269ac7a638eSSatish Balay       N = nrow++ - 1;  \
270ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
271ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
272ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2733eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274ac7a638eSSatish Balay       } \
2753eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
277ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
278ac7a638eSSatish Balay       b_noinsert:; \
279ac7a638eSSatish Balay     bilen[brow] = nrow; \
280ac7a638eSSatish Balay }
281ac7a638eSSatish Balay 
28293fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2834a2ae208SSatish Balay #undef __FUNCT__
2844a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ"
285b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
28657b952d6SSatish Balay {
2876fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
288dfbe8321SBarry Smith   PetscErrorCode ierr;
289b24ad042SBarry Smith   PetscInt       i,N = m*n;
2906fa18ffdSBarry Smith   MatScalar      *vsingle;
29193fea6afSBarry Smith 
29293fea6afSBarry Smith   PetscFunctionBegin;
2936fa18ffdSBarry Smith   if (N > b->setvalueslen) {
2946fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
29582502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
2966fa18ffdSBarry Smith     b->setvalueslen  = N;
2976fa18ffdSBarry Smith   }
2986fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
2996fa18ffdSBarry Smith 
30093fea6afSBarry Smith   for (i=0; i<N; i++) {
30193fea6afSBarry Smith     vsingle[i] = v[i];
30293fea6afSBarry Smith   }
30393fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
30493fea6afSBarry Smith   PetscFunctionReturn(0);
30593fea6afSBarry Smith }
30693fea6afSBarry Smith 
3074a2ae208SSatish Balay #undef __FUNCT__
3084a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
309b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
31093fea6afSBarry Smith {
3116fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
312dfbe8321SBarry Smith   PetscErrorCode ierr;
313b24ad042SBarry Smith   PetscInt       i,N = m*n*b->bs2;
3146fa18ffdSBarry Smith   MatScalar      *vsingle;
31593fea6afSBarry Smith 
31693fea6afSBarry Smith   PetscFunctionBegin;
3176fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3186fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
31982502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3206fa18ffdSBarry Smith     b->setvalueslen  = N;
3216fa18ffdSBarry Smith   }
3226fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
32393fea6afSBarry Smith   for (i=0; i<N; i++) {
32493fea6afSBarry Smith     vsingle[i] = v[i];
32593fea6afSBarry Smith   }
32693fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
32793fea6afSBarry Smith   PetscFunctionReturn(0);
32893fea6afSBarry Smith }
3296fa18ffdSBarry Smith 
3304a2ae208SSatish Balay #undef __FUNCT__
3314a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
332b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
3336fa18ffdSBarry Smith {
3346fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
335dfbe8321SBarry Smith   PetscErrorCode ierr;
336b24ad042SBarry Smith   PetscInt       i,N = m*n;
3376fa18ffdSBarry Smith   MatScalar      *vsingle;
3386fa18ffdSBarry Smith 
3396fa18ffdSBarry Smith   PetscFunctionBegin;
3406fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3416fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
34282502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3436fa18ffdSBarry Smith     b->setvalueslen  = N;
3446fa18ffdSBarry Smith   }
3456fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3466fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3476fa18ffdSBarry Smith     vsingle[i] = v[i];
3486fa18ffdSBarry Smith   }
3496fa18ffdSBarry Smith   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3506fa18ffdSBarry Smith   PetscFunctionReturn(0);
3516fa18ffdSBarry Smith }
3526fa18ffdSBarry Smith 
3534a2ae208SSatish Balay #undef __FUNCT__
3544a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
355b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
3566fa18ffdSBarry Smith {
3576fa18ffdSBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
358dfbe8321SBarry Smith   PetscErrorCode ierr;
359b24ad042SBarry Smith   PetscInt       i,N = m*n*b->bs2;
3606fa18ffdSBarry Smith   MatScalar      *vsingle;
3616fa18ffdSBarry Smith 
3626fa18ffdSBarry Smith   PetscFunctionBegin;
3636fa18ffdSBarry Smith   if (N > b->setvalueslen) {
3646fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
36582502324SSatish Balay     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
3666fa18ffdSBarry Smith     b->setvalueslen  = N;
3676fa18ffdSBarry Smith   }
3686fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
3696fa18ffdSBarry Smith   for (i=0; i<N; i++) {
3706fa18ffdSBarry Smith     vsingle[i] = v[i];
3716fa18ffdSBarry Smith   }
3726fa18ffdSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
3736fa18ffdSBarry Smith   PetscFunctionReturn(0);
3746fa18ffdSBarry Smith }
37593fea6afSBarry Smith #endif
37693fea6afSBarry Smith 
3774a2ae208SSatish Balay #undef __FUNCT__
378e03e44c9SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
379b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
38093fea6afSBarry Smith {
38157b952d6SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
38293fea6afSBarry Smith   MatScalar      value;
383273d9f13SBarry Smith   PetscTruth     roworiented = baij->roworiented;
384dfbe8321SBarry Smith   PetscErrorCode ierr;
385b24ad042SBarry Smith   PetscInt       i,j,row,col;
386b24ad042SBarry Smith   PetscInt       rstart_orig=baij->rstart_bs;
387b24ad042SBarry Smith   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
388521d7252SBarry Smith   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
38957b952d6SSatish Balay 
390eada6651SSatish Balay   /* Some Variables required in the macro */
39180c1aa95SSatish Balay   Mat            A = baij->A;
39280c1aa95SSatish Balay   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
393b24ad042SBarry Smith   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
3943eda8832SBarry Smith   MatScalar      *aa=a->a;
395ac7a638eSSatish Balay 
396ac7a638eSSatish Balay   Mat            B = baij->B;
397ac7a638eSSatish Balay   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
398b24ad042SBarry Smith   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3993eda8832SBarry Smith   MatScalar      *ba=b->a;
400ac7a638eSSatish Balay 
401b24ad042SBarry Smith   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
402b24ad042SBarry Smith   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
4033eda8832SBarry Smith   MatScalar      *ap,*bap;
40480c1aa95SSatish Balay 
405d64ed03dSBarry Smith   PetscFunctionBegin;
40657b952d6SSatish Balay   for (i=0; i<m; i++) {
4075ef9f2a5SBarry Smith     if (im[i] < 0) continue;
4082515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
40977431f27SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
410639f9d9dSBarry Smith #endif
41157b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
41257b952d6SSatish Balay       row = im[i] - rstart_orig;
41357b952d6SSatish Balay       for (j=0; j<n; j++) {
41457b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
41557b952d6SSatish Balay           col = in[j] - cstart_orig;
41657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
417f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
41880c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
41973959e64SBarry Smith         } else if (in[j] < 0) continue;
4202515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
42177431f27SBarry Smith         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
422639f9d9dSBarry Smith #endif
42357b952d6SSatish Balay         else {
42457b952d6SSatish Balay           if (mat->was_assembled) {
425905e6a2fSBarry Smith             if (!baij->colmap) {
426905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427905e6a2fSBarry Smith             }
428aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
4290f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
430bba1ac68SSatish Balay             col  = col - 1;
43148e59246SSatish Balay #else
432bba1ac68SSatish Balay             col = baij->colmap[in[j]/bs] - 1;
43348e59246SSatish Balay #endif
43457b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
43557b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4368295de27SSatish Balay               col =  in[j];
4379bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
4389bf004c3SSatish Balay               B = baij->B;
4399bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
4409bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
4419bf004c3SSatish Balay               ba=b->a;
442bba1ac68SSatish Balay             } else col += in[j]%bs;
4438295de27SSatish Balay           } else col = in[j];
44457b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
44590da58bdSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
44690da58bdSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
44757b952d6SSatish Balay         }
44857b952d6SSatish Balay       }
449d64ed03dSBarry Smith     } else {
45090f02eecSBarry Smith       if (!baij->donotstash) {
451ff2fd236SBarry Smith         if (roworiented) {
4526fa18ffdSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453ff2fd236SBarry Smith         } else {
4546fa18ffdSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
45557b952d6SSatish Balay         }
45657b952d6SSatish Balay       }
45757b952d6SSatish Balay     }
45890f02eecSBarry Smith   }
4593a40ed3dSBarry Smith   PetscFunctionReturn(0);
46057b952d6SSatish Balay }
46157b952d6SSatish Balay 
4624a2ae208SSatish Balay #undef __FUNCT__
4634a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
464b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
465ab26458aSBarry Smith {
466ab26458aSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
467f15d580aSBarry Smith   const MatScalar *value;
468f15d580aSBarry Smith   MatScalar       *barray=baij->barray;
469273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
470dfbe8321SBarry Smith   PetscErrorCode  ierr;
471b24ad042SBarry Smith   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
472b24ad042SBarry Smith   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
473521d7252SBarry Smith   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
474ab26458aSBarry Smith 
475b16ae2b1SBarry Smith   PetscFunctionBegin;
47630793edcSSatish Balay   if(!barray) {
47782502324SSatish Balay     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
47882502324SSatish Balay     baij->barray = barray;
47930793edcSSatish Balay   }
48030793edcSSatish Balay 
481ab26458aSBarry Smith   if (roworiented) {
482ab26458aSBarry Smith     stepval = (n-1)*bs;
483ab26458aSBarry Smith   } else {
484ab26458aSBarry Smith     stepval = (m-1)*bs;
485ab26458aSBarry Smith   }
486ab26458aSBarry Smith   for (i=0; i<m; i++) {
4875ef9f2a5SBarry Smith     if (im[i] < 0) continue;
4882515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
48977431f27SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
490ab26458aSBarry Smith #endif
491ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
492ab26458aSBarry Smith       row = im[i] - rstart;
493ab26458aSBarry Smith       for (j=0; j<n; j++) {
49415b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
49515b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
496f15d580aSBarry Smith           barray = (MatScalar*)v + i*bs2;
49715b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
498f15d580aSBarry Smith           barray = (MatScalar*)v + j*bs2;
49915b57d14SSatish Balay         } else { /* Here a copy is required */
500ab26458aSBarry Smith           if (roworiented) {
501ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
502ab26458aSBarry Smith           } else {
503ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
504abef11f7SSatish Balay           }
50547513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
50647513183SBarry Smith             for (jj=0; jj<bs; jj++) {
50730793edcSSatish Balay               *barray++  = *value++;
50847513183SBarry Smith             }
50947513183SBarry Smith           }
51030793edcSSatish Balay           barray -=bs2;
51115b57d14SSatish Balay         }
512abef11f7SSatish Balay 
513abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
514abef11f7SSatish Balay           col  = in[j] - cstart;
51593fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516ab26458aSBarry Smith         }
5175ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
5182515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
51977431f27SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
520ab26458aSBarry Smith #endif
521ab26458aSBarry Smith         else {
522ab26458aSBarry Smith           if (mat->was_assembled) {
523ab26458aSBarry Smith             if (!baij->colmap) {
524ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
525ab26458aSBarry Smith             }
526a5eb4965SSatish Balay 
5272515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
528aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
529b24ad042SBarry Smith             { PetscInt data;
5300f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
53129bbc08cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
532fa46199cSSatish Balay             }
53348e59246SSatish Balay #else
53429bbc08cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
535a5eb4965SSatish Balay #endif
53648e59246SSatish Balay #endif
537aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
5380f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
539fa46199cSSatish Balay             col  = (col - 1)/bs;
54048e59246SSatish Balay #else
541a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
54248e59246SSatish Balay #endif
543ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
544ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
545ab26458aSBarry Smith               col =  in[j];
546ab26458aSBarry Smith             }
547ab26458aSBarry Smith           }
548ab26458aSBarry Smith           else col = in[j];
54993fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
550ab26458aSBarry Smith         }
551ab26458aSBarry Smith       }
552d64ed03dSBarry Smith     } else {
553ab26458aSBarry Smith       if (!baij->donotstash) {
554ff2fd236SBarry Smith         if (roworiented) {
5556fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556ff2fd236SBarry Smith         } else {
5576fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
558ff2fd236SBarry Smith         }
559abef11f7SSatish Balay       }
560ab26458aSBarry Smith     }
561ab26458aSBarry Smith   }
5623a40ed3dSBarry Smith   PetscFunctionReturn(0);
563ab26458aSBarry Smith }
5646fa18ffdSBarry Smith 
5650bdbc534SSatish Balay #define HASH_KEY 0.6180339887
566b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
567b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
568b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
5694a2ae208SSatish Balay #undef __FUNCT__
5704a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
571b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
5720bdbc534SSatish Balay {
5730bdbc534SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
574273d9f13SBarry Smith   PetscTruth     roworiented = baij->roworiented;
575dfbe8321SBarry Smith   PetscErrorCode ierr;
576b24ad042SBarry Smith   PetscInt       i,j,row,col;
577b24ad042SBarry Smith   PetscInt       rstart_orig=baij->rstart_bs;
578b24ad042SBarry Smith   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
579521d7252SBarry Smith   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
580329f5518SBarry Smith   PetscReal      tmp;
5813eda8832SBarry Smith   MatScalar      **HD = baij->hd,value;
5822515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
583b24ad042SBarry Smith   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5844a15367fSSatish Balay #endif
5850bdbc534SSatish Balay 
5860bdbc534SSatish Balay   PetscFunctionBegin;
5870bdbc534SSatish Balay 
5880bdbc534SSatish Balay   for (i=0; i<m; i++) {
5892515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
59029bbc08cSBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
59177431f27SBarry Smith     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
5920bdbc534SSatish Balay #endif
5930bdbc534SSatish Balay       row = im[i];
594c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
5950bdbc534SSatish Balay       for (j=0; j<n; j++) {
5960bdbc534SSatish Balay         col = in[j];
5976fa18ffdSBarry Smith         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
598b24ad042SBarry Smith         /* Look up PetscInto the Hash Table */
599c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
600c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6010bdbc534SSatish Balay 
602c2760754SSatish Balay 
603c2760754SSatish Balay         idx = h1;
6042515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
605187ce0cbSSatish Balay         insert_ct++;
606187ce0cbSSatish Balay         total_ct++;
607187ce0cbSSatish Balay         if (HT[idx] != key) {
608187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
609187ce0cbSSatish Balay           if (idx == size) {
610187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
611187ce0cbSSatish Balay             if (idx == h1) {
61277431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
613187ce0cbSSatish Balay             }
614187ce0cbSSatish Balay           }
615187ce0cbSSatish Balay         }
616187ce0cbSSatish Balay #else
617c2760754SSatish Balay         if (HT[idx] != key) {
618c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
619c2760754SSatish Balay           if (idx == size) {
620c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
621c2760754SSatish Balay             if (idx == h1) {
62277431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
623c2760754SSatish Balay             }
624c2760754SSatish Balay           }
625c2760754SSatish Balay         }
626187ce0cbSSatish Balay #endif
627c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
628c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
629c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
6300bdbc534SSatish Balay       }
6310bdbc534SSatish Balay     } else {
6320bdbc534SSatish Balay       if (!baij->donotstash) {
633ff2fd236SBarry Smith         if (roworiented) {
6348798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
635ff2fd236SBarry Smith         } else {
6368798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
6370bdbc534SSatish Balay         }
6380bdbc534SSatish Balay       }
6390bdbc534SSatish Balay     }
6400bdbc534SSatish Balay   }
6412515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
642187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
643187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
644187ce0cbSSatish Balay #endif
6450bdbc534SSatish Balay   PetscFunctionReturn(0);
6460bdbc534SSatish Balay }
6470bdbc534SSatish Balay 
6484a2ae208SSatish Balay #undef __FUNCT__
6494a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
650b24ad042SBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
6510bdbc534SSatish Balay {
6520bdbc534SSatish Balay   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
653273d9f13SBarry Smith   PetscTruth      roworiented = baij->roworiented;
654dfbe8321SBarry Smith   PetscErrorCode  ierr;
655b24ad042SBarry Smith   PetscInt        i,j,ii,jj,row,col;
656b24ad042SBarry Smith   PetscInt        rstart=baij->rstart ;
657521d7252SBarry Smith   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
658b24ad042SBarry Smith   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
659329f5518SBarry Smith   PetscReal       tmp;
6603eda8832SBarry Smith   MatScalar       **HD = baij->hd,*baij_a;
661f15d580aSBarry Smith   const MatScalar *v_t,*value;
6622515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
663b24ad042SBarry Smith   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
6644a15367fSSatish Balay #endif
6650bdbc534SSatish Balay 
666d0a41580SSatish Balay   PetscFunctionBegin;
667d0a41580SSatish Balay 
6680bdbc534SSatish Balay   if (roworiented) {
6690bdbc534SSatish Balay     stepval = (n-1)*bs;
6700bdbc534SSatish Balay   } else {
6710bdbc534SSatish Balay     stepval = (m-1)*bs;
6720bdbc534SSatish Balay   }
6730bdbc534SSatish Balay   for (i=0; i<m; i++) {
6742515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
67577431f27SBarry Smith     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
67677431f27SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
6770bdbc534SSatish Balay #endif
6780bdbc534SSatish Balay     row   = im[i];
679187ce0cbSSatish Balay     v_t   = v + i*bs2;
680c2760754SSatish Balay     if (row >= rstart && row < rend) {
6810bdbc534SSatish Balay       for (j=0; j<n; j++) {
6820bdbc534SSatish Balay         col = in[j];
6830bdbc534SSatish Balay 
6840bdbc534SSatish Balay         /* Look up into the Hash Table */
685c2760754SSatish Balay         key = row*Nbs+col+1;
686c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6870bdbc534SSatish Balay 
688c2760754SSatish Balay         idx = h1;
6892515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
690187ce0cbSSatish Balay         total_ct++;
691187ce0cbSSatish Balay         insert_ct++;
692187ce0cbSSatish Balay        if (HT[idx] != key) {
693187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
694187ce0cbSSatish Balay           if (idx == size) {
695187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
696187ce0cbSSatish Balay             if (idx == h1) {
69777431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
698187ce0cbSSatish Balay             }
699187ce0cbSSatish Balay           }
700187ce0cbSSatish Balay         }
701187ce0cbSSatish Balay #else
702c2760754SSatish Balay         if (HT[idx] != key) {
703c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
704c2760754SSatish Balay           if (idx == size) {
705c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
706c2760754SSatish Balay             if (idx == h1) {
70777431f27SBarry Smith               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
708c2760754SSatish Balay             }
709c2760754SSatish Balay           }
710c2760754SSatish Balay         }
711187ce0cbSSatish Balay #endif
712c2760754SSatish Balay         baij_a = HD[idx];
7130bdbc534SSatish Balay         if (roworiented) {
714c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
715187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
716187ce0cbSSatish Balay           value = v_t;
717187ce0cbSSatish Balay           v_t  += bs;
718fef45726SSatish Balay           if (addv == ADD_VALUES) {
719c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
720c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
721fef45726SSatish Balay                 baij_a[jj]  += *value++;
722b4cc0f5aSSatish Balay               }
723b4cc0f5aSSatish Balay             }
724fef45726SSatish Balay           } else {
725c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
726c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
727fef45726SSatish Balay                 baij_a[jj]  = *value++;
728fef45726SSatish Balay               }
729fef45726SSatish Balay             }
730fef45726SSatish Balay           }
7310bdbc534SSatish Balay         } else {
7320bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
733fef45726SSatish Balay           if (addv == ADD_VALUES) {
734b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
7350bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
736fef45726SSatish Balay                 baij_a[jj]  += *value++;
737fef45726SSatish Balay               }
738fef45726SSatish Balay             }
739fef45726SSatish Balay           } else {
740fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
741fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
742fef45726SSatish Balay                 baij_a[jj]  = *value++;
743fef45726SSatish Balay               }
744b4cc0f5aSSatish Balay             }
7450bdbc534SSatish Balay           }
7460bdbc534SSatish Balay         }
7470bdbc534SSatish Balay       }
7480bdbc534SSatish Balay     } else {
7490bdbc534SSatish Balay       if (!baij->donotstash) {
7500bdbc534SSatish Balay         if (roworiented) {
7518798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7520bdbc534SSatish Balay         } else {
7538798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7540bdbc534SSatish Balay         }
7550bdbc534SSatish Balay       }
7560bdbc534SSatish Balay     }
7570bdbc534SSatish Balay   }
7582515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
759187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
760187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
761187ce0cbSSatish Balay #endif
7620bdbc534SSatish Balay   PetscFunctionReturn(0);
7630bdbc534SSatish Balay }
764133cdb44SSatish Balay 
7654a2ae208SSatish Balay #undef __FUNCT__
7664a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ"
767b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
768d6de1c52SSatish Balay {
769d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
7706849ba73SBarry Smith   PetscErrorCode ierr;
771521d7252SBarry Smith   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
772b24ad042SBarry Smith   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
773d6de1c52SSatish Balay 
774133cdb44SSatish Balay   PetscFunctionBegin;
775d6de1c52SSatish Balay   for (i=0; i<m; i++) {
77677431f27SBarry Smith     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
77777431f27SBarry Smith     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
778d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
779d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
780d6de1c52SSatish Balay       for (j=0; j<n; j++) {
78177431f27SBarry Smith         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
78277431f27SBarry Smith         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
783d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
784d6de1c52SSatish Balay           col = idxn[j] - bscstart;
78598dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786d64ed03dSBarry Smith         } else {
787905e6a2fSBarry Smith           if (!baij->colmap) {
788905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
789905e6a2fSBarry Smith           }
790aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7910f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
792fa46199cSSatish Balay           data --;
79348e59246SSatish Balay #else
79448e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
79548e59246SSatish Balay #endif
79648e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
797d9d09a02SSatish Balay           else {
79848e59246SSatish Balay             col  = data + idxn[j]%bs;
79998dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
800d6de1c52SSatish Balay           }
801d6de1c52SSatish Balay         }
802d6de1c52SSatish Balay       }
803d64ed03dSBarry Smith     } else {
80429bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
805d6de1c52SSatish Balay     }
806d6de1c52SSatish Balay   }
8073a40ed3dSBarry Smith  PetscFunctionReturn(0);
808d6de1c52SSatish Balay }
809d6de1c52SSatish Balay 
8104a2ae208SSatish Balay #undef __FUNCT__
8114a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ"
812dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
813d6de1c52SSatish Balay {
814d6de1c52SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
815d6de1c52SSatish Balay   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
816dfbe8321SBarry Smith   PetscErrorCode ierr;
8178a62d963SHong Zhang   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col;
818329f5518SBarry Smith   PetscReal      sum = 0.0;
8193eda8832SBarry Smith   MatScalar      *v;
820d6de1c52SSatish Balay 
821d64ed03dSBarry Smith   PetscFunctionBegin;
822d6de1c52SSatish Balay   if (baij->size == 1) {
823064f8208SBarry Smith     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
824d6de1c52SSatish Balay   } else {
825d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
826d6de1c52SSatish Balay       v = amat->a;
8278a62d963SHong Zhang       nz = amat->nz*bs2;
8288a62d963SHong Zhang       for (i=0; i<nz; i++) {
829aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
830329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
831d6de1c52SSatish Balay #else
832d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
833d6de1c52SSatish Balay #endif
834d6de1c52SSatish Balay       }
835d6de1c52SSatish Balay       v = bmat->a;
8368a62d963SHong Zhang       nz = bmat->nz*bs2;
8378a62d963SHong Zhang       for (i=0; i<nz; i++) {
838aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
839329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
840d6de1c52SSatish Balay #else
841d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
842d6de1c52SSatish Balay #endif
843d6de1c52SSatish Balay       }
844064f8208SBarry Smith       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
845064f8208SBarry Smith       *nrm = sqrt(*nrm);
8468a62d963SHong Zhang     } else if (type == NORM_1) { /* max column sum */
8478a62d963SHong Zhang       PetscReal *tmp,*tmp2;
8488a62d963SHong Zhang       PetscInt  *jj,*garray=baij->garray,cstart=baij->cstart;
8498a62d963SHong Zhang       ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
8508a62d963SHong Zhang       tmp2 = tmp + mat->N;
8518a62d963SHong Zhang       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
8528a62d963SHong Zhang       v = amat->a; jj = amat->j;
8538a62d963SHong Zhang       for (i=0; i<amat->nz; i++) {
8548a62d963SHong Zhang         for (j=0; j<bs; j++){
8558a62d963SHong Zhang           col = bs*(cstart + *jj) + j; /* column index */
8568a62d963SHong Zhang           for (row=0; row<bs; row++){
8578a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v);  v++;
8588a62d963SHong Zhang           }
8598a62d963SHong Zhang         }
8608a62d963SHong Zhang         jj++;
8618a62d963SHong Zhang       }
8628a62d963SHong Zhang       v = bmat->a; jj = bmat->j;
8638a62d963SHong Zhang       for (i=0; i<bmat->nz; i++) {
8648a62d963SHong Zhang         for (j=0; j<bs; j++){
8658a62d963SHong Zhang           col = bs*garray[*jj] + j;
8668a62d963SHong Zhang           for (row=0; row<bs; row++){
8678a62d963SHong Zhang             tmp[col] += PetscAbsScalar(*v); v++;
8688a62d963SHong Zhang           }
8698a62d963SHong Zhang         }
8708a62d963SHong Zhang         jj++;
8718a62d963SHong Zhang       }
8728a62d963SHong Zhang       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
8738a62d963SHong Zhang       *nrm = 0.0;
8748a62d963SHong Zhang       for (j=0; j<mat->N; j++) {
8758a62d963SHong Zhang         if (tmp2[j] > *nrm) *nrm = tmp2[j];
8768a62d963SHong Zhang       }
8778a62d963SHong Zhang       ierr = PetscFree(tmp);CHKERRQ(ierr);
8788a62d963SHong Zhang     } else if (type == NORM_INFINITY) { /* max row sum */
879*577dd1f9SKris Buschelman       PetscReal *sums;
880*577dd1f9SKris Buschelman       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
8818a62d963SHong Zhang       sum = 0.0;
8828a62d963SHong Zhang       for (j=0; j<amat->mbs; j++) {
8838a62d963SHong Zhang         for (row=0; row<bs; row++) sums[row] = 0.0;
8848a62d963SHong Zhang         v = amat->a + bs2*amat->i[j];
8858a62d963SHong Zhang         nz = amat->i[j+1]-amat->i[j];
8868a62d963SHong Zhang         for (i=0; i<nz; i++) {
8878a62d963SHong Zhang           for (col=0; col<bs; col++){
8888a62d963SHong Zhang             for (row=0; row<bs; row++){
8898a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
8908a62d963SHong Zhang             }
8918a62d963SHong Zhang           }
8928a62d963SHong Zhang         }
8938a62d963SHong Zhang         v = bmat->a + bs2*bmat->i[j];
8948a62d963SHong Zhang         nz = bmat->i[j+1]-bmat->i[j];
8958a62d963SHong Zhang         for (i=0; i<nz; i++) {
8968a62d963SHong Zhang           for (col=0; col<bs; col++){
8978a62d963SHong Zhang             for (row=0; row<bs; row++){
8988a62d963SHong Zhang               sums[row] += PetscAbsScalar(*v); v++;
8998a62d963SHong Zhang             }
9008a62d963SHong Zhang           }
9018a62d963SHong Zhang         }
9028a62d963SHong Zhang         for (row=0; row<bs; row++){
9038a62d963SHong Zhang           if (sums[row] > sum) sum = sums[row];
9048a62d963SHong Zhang         }
9058a62d963SHong Zhang       }
9068a62d963SHong Zhang       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
907*577dd1f9SKris Buschelman       ierr = PetscFree(sums);CHKERRQ(ierr);
908d64ed03dSBarry Smith     } else {
90929bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
910d6de1c52SSatish Balay     }
911d64ed03dSBarry Smith   }
9123a40ed3dSBarry Smith   PetscFunctionReturn(0);
913d6de1c52SSatish Balay }
91457b952d6SSatish Balay 
915fef45726SSatish Balay /*
916fef45726SSatish Balay   Creates the hash table, and sets the table
917fef45726SSatish Balay   This table is created only once.
918fef45726SSatish Balay   If new entried need to be added to the matrix
919fef45726SSatish Balay   then the hash table has to be destroyed and
920fef45726SSatish Balay   recreated.
921fef45726SSatish Balay */
9224a2ae208SSatish Balay #undef __FUNCT__
9234a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
924dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
925596b8d2eSBarry Smith {
926596b8d2eSBarry Smith   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
927596b8d2eSBarry Smith   Mat            A = baij->A,B=baij->B;
928596b8d2eSBarry Smith   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
929b24ad042SBarry Smith   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
9306849ba73SBarry Smith   PetscErrorCode ierr;
931b24ad042SBarry Smith   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
932b24ad042SBarry Smith   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
933b24ad042SBarry Smith   PetscInt       *HT,key;
9343eda8832SBarry Smith   MatScalar      **HD;
935329f5518SBarry Smith   PetscReal      tmp;
9362515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
937b24ad042SBarry Smith   PetscInt       ct=0,max=0;
9384a15367fSSatish Balay #endif
939fef45726SSatish Balay 
940d64ed03dSBarry Smith   PetscFunctionBegin;
941b24ad042SBarry Smith   baij->ht_size=(PetscInt)(factor*nz);
9420bdbc534SSatish Balay   size = baij->ht_size;
943fef45726SSatish Balay 
9440bdbc534SSatish Balay   if (baij->ht) {
9450bdbc534SSatish Balay     PetscFunctionReturn(0);
946596b8d2eSBarry Smith   }
9470bdbc534SSatish Balay 
948fef45726SSatish Balay   /* Allocate Memory for Hash Table */
949b24ad042SBarry Smith   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
950b24ad042SBarry Smith   baij->ht = (PetscInt*)(baij->hd + size);
951b9e4cc15SSatish Balay   HD       = baij->hd;
952a07cd24cSSatish Balay   HT       = baij->ht;
953b9e4cc15SSatish Balay 
954b9e4cc15SSatish Balay 
955b24ad042SBarry Smith   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
9560bdbc534SSatish Balay 
957596b8d2eSBarry Smith 
958596b8d2eSBarry Smith   /* Loop Over A */
9590bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
960596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
9610bdbc534SSatish Balay       row = i+rstart;
9620bdbc534SSatish Balay       col = aj[j]+cstart;
963596b8d2eSBarry Smith 
964187ce0cbSSatish Balay       key = row*Nbs + col + 1;
965c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9660bdbc534SSatish Balay       for (k=0; k<size; k++){
967958c9bccSBarry Smith         if (!HT[(h1+k)%size]) {
9680bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9690bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
970596b8d2eSBarry Smith           break;
9712515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
972187ce0cbSSatish Balay         } else {
973187ce0cbSSatish Balay           ct++;
974187ce0cbSSatish Balay #endif
975596b8d2eSBarry Smith         }
976187ce0cbSSatish Balay       }
9772515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
978187ce0cbSSatish Balay       if (k> max) max = k;
979187ce0cbSSatish Balay #endif
980596b8d2eSBarry Smith     }
981596b8d2eSBarry Smith   }
982596b8d2eSBarry Smith   /* Loop Over B */
9830bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
984596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
9850bdbc534SSatish Balay       row = i+rstart;
9860bdbc534SSatish Balay       col = garray[bj[j]];
987187ce0cbSSatish Balay       key = row*Nbs + col + 1;
988c2760754SSatish Balay       h1  = HASH(size,key,tmp);
9890bdbc534SSatish Balay       for (k=0; k<size; k++){
990958c9bccSBarry Smith         if (!HT[(h1+k)%size]) {
9910bdbc534SSatish Balay           HT[(h1+k)%size] = key;
9920bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
993596b8d2eSBarry Smith           break;
9942515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
995187ce0cbSSatish Balay         } else {
996187ce0cbSSatish Balay           ct++;
997187ce0cbSSatish Balay #endif
998596b8d2eSBarry Smith         }
999187ce0cbSSatish Balay       }
10002515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
1001187ce0cbSSatish Balay       if (k> max) max = k;
1002187ce0cbSSatish Balay #endif
1003596b8d2eSBarry Smith     }
1004596b8d2eSBarry Smith   }
1005596b8d2eSBarry Smith 
1006596b8d2eSBarry Smith   /* Print Summary */
10072515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
1008c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
1009596b8d2eSBarry Smith     if (HT[i]) {j++;}
1010c38d4ed2SBarry Smith   }
101163ba0a88SBarry Smith   ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr);
1012187ce0cbSSatish Balay #endif
10133a40ed3dSBarry Smith   PetscFunctionReturn(0);
1014596b8d2eSBarry Smith }
101557b952d6SSatish Balay 
10164a2ae208SSatish Balay #undef __FUNCT__
10174a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
1018dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
1019bbb85fb3SSatish Balay {
1020bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1021dfbe8321SBarry Smith   PetscErrorCode ierr;
1022b24ad042SBarry Smith   PetscInt       nstash,reallocs;
1023bbb85fb3SSatish Balay   InsertMode     addv;
1024bbb85fb3SSatish Balay 
1025bbb85fb3SSatish Balay   PetscFunctionBegin;
1026bbb85fb3SSatish Balay   if (baij->donotstash) {
1027bbb85fb3SSatish Balay     PetscFunctionReturn(0);
1028bbb85fb3SSatish Balay   }
1029bbb85fb3SSatish Balay 
1030bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
1031bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
1032bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
103329bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1034bbb85fb3SSatish Balay   }
1035bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
1036bbb85fb3SSatish Balay 
10378798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
10388798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
10398798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
104063ba0a88SBarry Smith   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
104146680499SSatish Balay   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
104263ba0a88SBarry Smith   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1043bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1044bbb85fb3SSatish Balay }
1045bbb85fb3SSatish Balay 
10464a2ae208SSatish Balay #undef __FUNCT__
10474a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
1048dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
1049bbb85fb3SSatish Balay {
1050bbb85fb3SSatish Balay   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
1051a2d1c673SSatish Balay   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
10526849ba73SBarry Smith   PetscErrorCode ierr;
1053b24ad042SBarry Smith   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
1054b24ad042SBarry Smith   PetscInt       *row,*col,other_disassembled;
10557c922b88SBarry Smith   PetscTruth     r1,r2,r3;
10563eda8832SBarry Smith   MatScalar      *val;
1057bbb85fb3SSatish Balay   InsertMode     addv = mat->insertmode;
1058b24ad042SBarry Smith   PetscMPIInt    n;
1059bbb85fb3SSatish Balay 
1060bbb85fb3SSatish Balay   PetscFunctionBegin;
1061bbb85fb3SSatish Balay   if (!baij->donotstash) {
1062a2d1c673SSatish Balay     while (1) {
10638798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1064a2d1c673SSatish Balay       if (!flg) break;
1065a2d1c673SSatish Balay 
1066bbb85fb3SSatish Balay       for (i=0; i<n;) {
1067bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1068bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1069bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
1070bbb85fb3SSatish Balay         else       ncols = n-i;
1071bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
107293fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1073bbb85fb3SSatish Balay         i = j;
1074bbb85fb3SSatish Balay       }
1075bbb85fb3SSatish Balay     }
10768798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1077a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1078a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1079a2d1c673SSatish Balay        restore the original flags */
1080a2d1c673SSatish Balay     r1 = baij->roworiented;
1081a2d1c673SSatish Balay     r2 = a->roworiented;
1082a2d1c673SSatish Balay     r3 = b->roworiented;
10837c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
10847c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
10857c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
1086a2d1c673SSatish Balay     while (1) {
10878798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1088a2d1c673SSatish Balay       if (!flg) break;
1089a2d1c673SSatish Balay 
1090a2d1c673SSatish Balay       for (i=0; i<n;) {
1091a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1092a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1093a2d1c673SSatish Balay         if (j < n) ncols = j-i;
1094a2d1c673SSatish Balay         else       ncols = n-i;
109593fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1096a2d1c673SSatish Balay         i = j;
1097a2d1c673SSatish Balay       }
1098a2d1c673SSatish Balay     }
10998798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1100a2d1c673SSatish Balay     baij->roworiented = r1;
1101a2d1c673SSatish Balay     a->roworiented    = r2;
1102a2d1c673SSatish Balay     b->roworiented    = r3;
1103bbb85fb3SSatish Balay   }
1104bbb85fb3SSatish Balay 
1105bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1106bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1107bbb85fb3SSatish Balay 
1108bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
1109bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1110bbb85fb3SSatish Balay   /*
1111bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1112bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1113bbb85fb3SSatish Balay   */
1114bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1115bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1116bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1117bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1118bbb85fb3SSatish Balay     }
1119bbb85fb3SSatish Balay   }
1120bbb85fb3SSatish Balay 
1121bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1122bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1123bbb85fb3SSatish Balay   }
112473e7a558SHong Zhang   b->compressedrow.use = PETSC_TRUE;
1125bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1126bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1127bbb85fb3SSatish Balay 
11282515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
1129bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
113063ba0a88SBarry Smith     ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr);
1131bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1132bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1133bbb85fb3SSatish Balay   }
1134bbb85fb3SSatish Balay #endif
1135bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1136bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1137bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1138bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1139bbb85fb3SSatish Balay   }
1140bbb85fb3SSatish Balay 
1141606d414cSSatish Balay   if (baij->rowvalues) {
1142606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1143606d414cSSatish Balay     baij->rowvalues = 0;
1144606d414cSSatish Balay   }
1145bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1146bbb85fb3SSatish Balay }
114757b952d6SSatish Balay 
11484a2ae208SSatish Balay #undef __FUNCT__
11494a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
11506849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
115157b952d6SSatish Balay {
115257b952d6SSatish Balay   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1153dfbe8321SBarry Smith   PetscErrorCode    ierr;
1154b24ad042SBarry Smith   PetscMPIInt       size = baij->size,rank = baij->rank;
1155521d7252SBarry Smith   PetscInt          bs = mat->bs;
115632077d6dSBarry Smith   PetscTruth        iascii,isdraw;
1157b0a32e0cSBarry Smith   PetscViewer       sviewer;
1158f3ef73ceSBarry Smith   PetscViewerFormat format;
115957b952d6SSatish Balay 
1160d64ed03dSBarry Smith   PetscFunctionBegin;
1161f81bd7ccSHong Zhang   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
116232077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1163fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
116432077d6dSBarry Smith   if (iascii) {
1165b0a32e0cSBarry Smith     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1166456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
11674e220ebcSLois Curfman McInnes       MatInfo info;
1168d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1169d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
117077431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
117177431f27SBarry Smith               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1172521d7252SBarry Smith               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1173d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
117477431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1175d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
117677431f27SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1177b0a32e0cSBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
117857b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
11793a40ed3dSBarry Smith       PetscFunctionReturn(0);
1180fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
118177431f27SBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
11823a40ed3dSBarry Smith       PetscFunctionReturn(0);
118304929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
118404929863SHong Zhang       PetscFunctionReturn(0);
118557b952d6SSatish Balay     }
118657b952d6SSatish Balay   }
118757b952d6SSatish Balay 
11880f5bd95cSBarry Smith   if (isdraw) {
1189b0a32e0cSBarry Smith     PetscDraw       draw;
119057b952d6SSatish Balay     PetscTruth isnull;
1191b0a32e0cSBarry Smith     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1192b0a32e0cSBarry Smith     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
119357b952d6SSatish Balay   }
119457b952d6SSatish Balay 
119557b952d6SSatish Balay   if (size == 1) {
1196873048abSBarry Smith     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
119757b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1198d64ed03dSBarry Smith   } else {
119957b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
120057b952d6SSatish Balay     Mat         A;
120157b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1202b24ad042SBarry Smith     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
12033eda8832SBarry Smith     MatScalar   *a;
120457b952d6SSatish Balay 
1205f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1206f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
120757b952d6SSatish Balay     if (!rank) {
1208f204ca49SKris Buschelman       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1209d64ed03dSBarry Smith     } else {
1210f204ca49SKris Buschelman       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
121157b952d6SSatish Balay     }
1212f204ca49SKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1213521d7252SBarry Smith     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
121452e6d16bSBarry Smith     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
121557b952d6SSatish Balay 
121657b952d6SSatish Balay     /* copy over the A part */
121757b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->A->data;
121857b952d6SSatish Balay     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1219b24ad042SBarry Smith     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
122057b952d6SSatish Balay 
122157b952d6SSatish Balay     for (i=0; i<mbs; i++) {
122257b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
122357b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
122457b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
122557b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
122657b952d6SSatish Balay         for (k=0; k<bs; k++) {
122793fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1228cee3aa6bSSatish Balay           col++; a += bs;
122957b952d6SSatish Balay         }
123057b952d6SSatish Balay       }
123157b952d6SSatish Balay     }
123257b952d6SSatish Balay     /* copy over the B part */
123357b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
123457b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
123557b952d6SSatish Balay     for (i=0; i<mbs; i++) {
123657b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
123757b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
123857b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
123957b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
124057b952d6SSatish Balay         for (k=0; k<bs; k++) {
124193fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1242cee3aa6bSSatish Balay           col++; a += bs;
124357b952d6SSatish Balay         }
124457b952d6SSatish Balay       }
124557b952d6SSatish Balay     }
1246606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
12476d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
12486d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124955843e3eSBarry Smith     /*
125055843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1251b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
125255843e3eSBarry Smith     */
1253b0a32e0cSBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1254f14a1c24SBarry Smith     if (!rank) {
1255e36acaf3SBarry Smith       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
12566831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
125757b952d6SSatish Balay     }
1258b0a32e0cSBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
125957b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
126057b952d6SSatish Balay   }
12613a40ed3dSBarry Smith   PetscFunctionReturn(0);
126257b952d6SSatish Balay }
126357b952d6SSatish Balay 
12644a2ae208SSatish Balay #undef __FUNCT__
12654a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ"
1266dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
126757b952d6SSatish Balay {
1268dfbe8321SBarry Smith   PetscErrorCode ierr;
126932077d6dSBarry Smith   PetscTruth     iascii,isdraw,issocket,isbinary;
127057b952d6SSatish Balay 
1271d64ed03dSBarry Smith   PetscFunctionBegin;
127232077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1273fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1274b0a32e0cSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1275fb9695e5SSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
127632077d6dSBarry Smith   if (iascii || isdraw || issocket || isbinary) {
12777b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
12785cd90555SBarry Smith   } else {
127979a5c55eSBarry Smith     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
128057b952d6SSatish Balay   }
12813a40ed3dSBarry Smith   PetscFunctionReturn(0);
128257b952d6SSatish Balay }
128357b952d6SSatish Balay 
12844a2ae208SSatish Balay #undef __FUNCT__
12854a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ"
1286dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
128779bdfe76SSatish Balay {
128879bdfe76SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1289dfbe8321SBarry Smith   PetscErrorCode ierr;
129079bdfe76SSatish Balay 
1291d64ed03dSBarry Smith   PetscFunctionBegin;
1292aa482453SBarry Smith #if defined(PETSC_USE_LOG)
129377431f27SBarry Smith   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
129479bdfe76SSatish Balay #endif
12958798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
12968798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1297606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
129879bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
129979bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1300aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
13010f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
130248e59246SSatish Balay #else
1303606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
130448e59246SSatish Balay #endif
1305606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1306606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1307606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1308606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1309606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1310606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
13116fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
13126fa18ffdSBarry Smith   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
13136fa18ffdSBarry Smith #endif
1314606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
1315901853e0SKris Buschelman 
1316901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1317901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1318901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1319901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1320aac34f13SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1321901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1322901853e0SKris Buschelman   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
13233a40ed3dSBarry Smith   PetscFunctionReturn(0);
132479bdfe76SSatish Balay }
132579bdfe76SSatish Balay 
13264a2ae208SSatish Balay #undef __FUNCT__
13274a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ"
1328dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1329cee3aa6bSSatish Balay {
1330cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1331dfbe8321SBarry Smith   PetscErrorCode ierr;
1332b24ad042SBarry Smith   PetscInt       nt;
1333cee3aa6bSSatish Balay 
1334d64ed03dSBarry Smith   PetscFunctionBegin;
1335e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1336273d9f13SBarry Smith   if (nt != A->n) {
133729bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
133847b4a8eaSLois Curfman McInnes   }
1339e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1340273d9f13SBarry Smith   if (nt != A->m) {
134129bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
134247b4a8eaSLois Curfman McInnes   }
134343a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1344f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
134543a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1346f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
134743a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
13483a40ed3dSBarry Smith   PetscFunctionReturn(0);
1349cee3aa6bSSatish Balay }
1350cee3aa6bSSatish Balay 
13514a2ae208SSatish Balay #undef __FUNCT__
13524a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1353dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1354cee3aa6bSSatish Balay {
1355cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1356dfbe8321SBarry Smith   PetscErrorCode ierr;
1357d64ed03dSBarry Smith 
1358d64ed03dSBarry Smith   PetscFunctionBegin;
135943a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1360f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
136143a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1362f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
13633a40ed3dSBarry Smith   PetscFunctionReturn(0);
1364cee3aa6bSSatish Balay }
1365cee3aa6bSSatish Balay 
13664a2ae208SSatish Balay #undef __FUNCT__
13674a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1368dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1369cee3aa6bSSatish Balay {
1370cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1371dfbe8321SBarry Smith   PetscErrorCode ierr;
1372a5ff213dSBarry Smith   PetscTruth     merged;
1373cee3aa6bSSatish Balay 
1374d64ed03dSBarry Smith   PetscFunctionBegin;
1375a5ff213dSBarry Smith   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1376cee3aa6bSSatish Balay   /* do nondiagonal part */
13777c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1378a5ff213dSBarry Smith   if (!merged) {
1379cee3aa6bSSatish Balay     /* send it on its way */
1380537820f0SBarry Smith     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1381cee3aa6bSSatish Balay     /* do local part */
13827c922b88SBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1383cee3aa6bSSatish Balay     /* receive remote parts: note this assumes the values are not actually */
1384a5ff213dSBarry Smith     /* inserted in yy until the next line */
1385639f9d9dSBarry Smith     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1386a5ff213dSBarry Smith   } else {
1387a5ff213dSBarry Smith     /* do local part */
1388a5ff213dSBarry Smith     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1389a5ff213dSBarry Smith     /* send it on its way */
1390a5ff213dSBarry Smith     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1391a5ff213dSBarry Smith     /* values actually were received in the Begin() but we need to call this nop */
1392a5ff213dSBarry Smith     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1393a5ff213dSBarry Smith   }
13943a40ed3dSBarry Smith   PetscFunctionReturn(0);
1395cee3aa6bSSatish Balay }
1396cee3aa6bSSatish Balay 
13974a2ae208SSatish Balay #undef __FUNCT__
13984a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1399dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1400cee3aa6bSSatish Balay {
1401cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1402dfbe8321SBarry Smith   PetscErrorCode ierr;
1403cee3aa6bSSatish Balay 
1404d64ed03dSBarry Smith   PetscFunctionBegin;
1405cee3aa6bSSatish Balay   /* do nondiagonal part */
14067c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1407cee3aa6bSSatish Balay   /* send it on its way */
1408537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1409cee3aa6bSSatish Balay   /* do local part */
14107c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1411cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1412cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1413cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1414537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
14153a40ed3dSBarry Smith   PetscFunctionReturn(0);
1416cee3aa6bSSatish Balay }
1417cee3aa6bSSatish Balay 
1418cee3aa6bSSatish Balay /*
1419cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1420cee3aa6bSSatish Balay    diagonal block
1421cee3aa6bSSatish Balay */
14224a2ae208SSatish Balay #undef __FUNCT__
14234a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1424dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1425cee3aa6bSSatish Balay {
1426cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1427dfbe8321SBarry Smith   PetscErrorCode ierr;
1428d64ed03dSBarry Smith 
1429d64ed03dSBarry Smith   PetscFunctionBegin;
1430273d9f13SBarry Smith   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
14313a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
14323a40ed3dSBarry Smith   PetscFunctionReturn(0);
1433cee3aa6bSSatish Balay }
1434cee3aa6bSSatish Balay 
14354a2ae208SSatish Balay #undef __FUNCT__
14364a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ"
1437dfbe8321SBarry Smith PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1438cee3aa6bSSatish Balay {
1439cee3aa6bSSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1440dfbe8321SBarry Smith   PetscErrorCode ierr;
1441d64ed03dSBarry Smith 
1442d64ed03dSBarry Smith   PetscFunctionBegin;
1443cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1444cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
14453a40ed3dSBarry Smith   PetscFunctionReturn(0);
1446cee3aa6bSSatish Balay }
1447026e39d0SSatish Balay 
14484a2ae208SSatish Balay #undef __FUNCT__
14494a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ"
1450b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1451acdf5bf4SSatish Balay {
1452acdf5bf4SSatish Balay   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
145387828ca2SBarry Smith   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
14546849ba73SBarry Smith   PetscErrorCode ierr;
1455521d7252SBarry Smith   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1456b24ad042SBarry Smith   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1457b24ad042SBarry Smith   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1458acdf5bf4SSatish Balay 
1459d64ed03dSBarry Smith   PetscFunctionBegin;
1460abc0a331SBarry Smith   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1461acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1462acdf5bf4SSatish Balay 
1463acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1464acdf5bf4SSatish Balay     /*
1465acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1466acdf5bf4SSatish Balay     */
1467acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1468b24ad042SBarry Smith     PetscInt     max = 1,mbs = mat->mbs,tmp;
1469bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1470acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1471acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1472acdf5bf4SSatish Balay     }
1473b24ad042SBarry Smith     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1474b24ad042SBarry Smith     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1475acdf5bf4SSatish Balay   }
1476acdf5bf4SSatish Balay 
147729bbc08cSBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1478d9d09a02SSatish Balay   lrow = row - brstart;
1479acdf5bf4SSatish Balay 
1480acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1481acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1482acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1483f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1484f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1485acdf5bf4SSatish Balay   nztot = nzA + nzB;
1486acdf5bf4SSatish Balay 
1487acdf5bf4SSatish Balay   cmap  = mat->garray;
1488acdf5bf4SSatish Balay   if (v  || idx) {
1489acdf5bf4SSatish Balay     if (nztot) {
1490acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1491b24ad042SBarry Smith       PetscInt imark = -1;
1492acdf5bf4SSatish Balay       if (v) {
1493acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1494acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1495d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1496acdf5bf4SSatish Balay           else break;
1497acdf5bf4SSatish Balay         }
1498acdf5bf4SSatish Balay         imark = i;
1499acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1500acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1501acdf5bf4SSatish Balay       }
1502acdf5bf4SSatish Balay       if (idx) {
1503acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1504acdf5bf4SSatish Balay         if (imark > -1) {
1505acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1506bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1507acdf5bf4SSatish Balay           }
1508acdf5bf4SSatish Balay         } else {
1509acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1510d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1511d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1512acdf5bf4SSatish Balay             else break;
1513acdf5bf4SSatish Balay           }
1514acdf5bf4SSatish Balay           imark = i;
1515acdf5bf4SSatish Balay         }
1516d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1517d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1518acdf5bf4SSatish Balay       }
1519d64ed03dSBarry Smith     } else {
1520d212a18eSSatish Balay       if (idx) *idx = 0;
1521d212a18eSSatish Balay       if (v)   *v   = 0;
1522d212a18eSSatish Balay     }
1523acdf5bf4SSatish Balay   }
1524acdf5bf4SSatish Balay   *nz = nztot;
1525f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1526f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
15273a40ed3dSBarry Smith   PetscFunctionReturn(0);
1528acdf5bf4SSatish Balay }
1529acdf5bf4SSatish Balay 
15304a2ae208SSatish Balay #undef __FUNCT__
15314a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1532b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1533acdf5bf4SSatish Balay {
1534acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1535d64ed03dSBarry Smith 
1536d64ed03dSBarry Smith   PetscFunctionBegin;
1537abc0a331SBarry Smith   if (!baij->getrowactive) {
153829bbc08cSBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1539acdf5bf4SSatish Balay   }
1540acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
15413a40ed3dSBarry Smith   PetscFunctionReturn(0);
1542acdf5bf4SSatish Balay }
1543acdf5bf4SSatish Balay 
15444a2ae208SSatish Balay #undef __FUNCT__
15454a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1546dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
154758667388SSatish Balay {
154858667388SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1549dfbe8321SBarry Smith   PetscErrorCode ierr;
1550d64ed03dSBarry Smith 
1551d64ed03dSBarry Smith   PetscFunctionBegin;
155258667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
155358667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
15543a40ed3dSBarry Smith   PetscFunctionReturn(0);
155558667388SSatish Balay }
15560ac07820SSatish Balay 
15574a2ae208SSatish Balay #undef __FUNCT__
15584a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1559dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
15600ac07820SSatish Balay {
15614e220ebcSLois Curfman McInnes   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
15624e220ebcSLois Curfman McInnes   Mat            A = a->A,B = a->B;
1563dfbe8321SBarry Smith   PetscErrorCode ierr;
1564329f5518SBarry Smith   PetscReal      isend[5],irecv[5];
15650ac07820SSatish Balay 
1566d64ed03dSBarry Smith   PetscFunctionBegin;
1567521d7252SBarry Smith   info->block_size     = (PetscReal)matin->bs;
15684e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
15690e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1570de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
15714e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
15720e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1573de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
15740ac07820SSatish Balay   if (flag == MAT_LOCAL) {
15754e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
15764e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
15774e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
15784e220ebcSLois Curfman McInnes     info->memory       = isend[3];
15794e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
15800ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1581d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
15824e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15834e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15844e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15854e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15864e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
15870ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1588d7d1e502SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
15894e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15904e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15914e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15924e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15934e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1594d41123aaSBarry Smith   } else {
159577431f27SBarry Smith     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
15960ac07820SSatish Balay   }
1597f6275e2eSBarry Smith   info->rows_global       = (PetscReal)A->M;
1598f6275e2eSBarry Smith   info->columns_global    = (PetscReal)A->N;
1599f6275e2eSBarry Smith   info->rows_local        = (PetscReal)A->m;
1600f6275e2eSBarry Smith   info->columns_local     = (PetscReal)A->N;
16014e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
16024e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
16034e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
16043a40ed3dSBarry Smith   PetscFunctionReturn(0);
16050ac07820SSatish Balay }
16060ac07820SSatish Balay 
16074a2ae208SSatish Balay #undef __FUNCT__
16084a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ"
1609dfbe8321SBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
161058667388SSatish Balay {
161158667388SSatish Balay   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1612dfbe8321SBarry Smith   PetscErrorCode ierr;
161358667388SSatish Balay 
1614d64ed03dSBarry Smith   PetscFunctionBegin;
161512c028f9SKris Buschelman   switch (op) {
161612c028f9SKris Buschelman   case MAT_NO_NEW_NONZERO_LOCATIONS:
161712c028f9SKris Buschelman   case MAT_YES_NEW_NONZERO_LOCATIONS:
161812c028f9SKris Buschelman   case MAT_COLUMNS_UNSORTED:
161912c028f9SKris Buschelman   case MAT_COLUMNS_SORTED:
162012c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
162112c028f9SKris Buschelman   case MAT_KEEP_ZEROED_ROWS:
162212c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
162398305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
162498305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
162512c028f9SKris Buschelman     break;
162612c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
16277c922b88SBarry Smith     a->roworiented = PETSC_TRUE;
162898305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
162998305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
163012c028f9SKris Buschelman     break;
163112c028f9SKris Buschelman   case MAT_ROWS_SORTED:
163212c028f9SKris Buschelman   case MAT_ROWS_UNSORTED:
163312c028f9SKris Buschelman   case MAT_YES_NEW_DIAGONALS:
163463ba0a88SBarry Smith     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
163512c028f9SKris Buschelman     break;
163612c028f9SKris Buschelman   case MAT_COLUMN_ORIENTED:
16377c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
163898305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
163998305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
164012c028f9SKris Buschelman     break;
164112c028f9SKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
16427c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
164312c028f9SKris Buschelman     break;
164412c028f9SKris Buschelman   case MAT_NO_NEW_DIAGONALS:
164529bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
164612c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
16477c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
164812c028f9SKris Buschelman     break;
164977e54ba9SKris Buschelman   case MAT_SYMMETRIC:
165077e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
16512188ac68SBarry Smith   case MAT_HERMITIAN:
16522188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
16532188ac68SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
16542188ac68SBarry Smith     break;
16559a4540c5SBarry Smith   case MAT_NOT_SYMMETRIC:
16569a4540c5SBarry Smith   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
16579a4540c5SBarry Smith   case MAT_NOT_HERMITIAN:
16589a4540c5SBarry Smith   case MAT_NOT_SYMMETRY_ETERNAL:
165977e54ba9SKris Buschelman     break;
166012c028f9SKris Buschelman   default:
166129bbc08cSBarry Smith     SETERRQ(PETSC_ERR_SUP,"unknown option");
1662d64ed03dSBarry Smith   }
16633a40ed3dSBarry Smith   PetscFunctionReturn(0);
166458667388SSatish Balay }
166558667388SSatish Balay 
16664a2ae208SSatish Balay #undef __FUNCT__
16674a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_MPIBAIJ("
1668dfbe8321SBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
16690ac07820SSatish Balay {
16700ac07820SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
16710ac07820SSatish Balay   Mat_SeqBAIJ    *Aloc;
16720ac07820SSatish Balay   Mat            B;
1673dfbe8321SBarry Smith   PetscErrorCode ierr;
1674b24ad042SBarry Smith   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1675521d7252SBarry Smith   PetscInt       bs=A->bs,mbs=baij->mbs;
16763eda8832SBarry Smith   MatScalar      *a;
16770ac07820SSatish Balay 
1678d64ed03dSBarry Smith   PetscFunctionBegin;
167929bbc08cSBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1680f204ca49SKris Buschelman   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1681f204ca49SKris Buschelman   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1682521d7252SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
16830ac07820SSatish Balay 
16840ac07820SSatish Balay   /* copy over the A part */
16850ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
16860ac07820SSatish Balay   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1687b24ad042SBarry Smith   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
16880ac07820SSatish Balay 
16890ac07820SSatish Balay   for (i=0; i<mbs; i++) {
16900ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16910ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16920ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16930ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
16940ac07820SSatish Balay       for (k=0; k<bs; k++) {
169593fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16960ac07820SSatish Balay         col++; a += bs;
16970ac07820SSatish Balay       }
16980ac07820SSatish Balay     }
16990ac07820SSatish Balay   }
17000ac07820SSatish Balay   /* copy over the B part */
17010ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
17020ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
17030ac07820SSatish Balay   for (i=0; i<mbs; i++) {
17040ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
17050ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
17060ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
17070ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
17080ac07820SSatish Balay       for (k=0; k<bs; k++) {
170993fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
17100ac07820SSatish Balay         col++; a += bs;
17110ac07820SSatish Balay       }
17120ac07820SSatish Balay     }
17130ac07820SSatish Balay   }
1714606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
17150ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17160ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17170ac07820SSatish Balay 
17187c922b88SBarry Smith   if (matout) {
17190ac07820SSatish Balay     *matout = B;
17200ac07820SSatish Balay   } else {
1721273d9f13SBarry Smith     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
17220ac07820SSatish Balay   }
17233a40ed3dSBarry Smith   PetscFunctionReturn(0);
17240ac07820SSatish Balay }
17250e95ebc0SSatish Balay 
17264a2ae208SSatish Balay #undef __FUNCT__
17274a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1728dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
17290e95ebc0SSatish Balay {
173036c4a09eSSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
173136c4a09eSSatish Balay   Mat            a = baij->A,b = baij->B;
1732dfbe8321SBarry Smith   PetscErrorCode ierr;
1733b24ad042SBarry Smith   PetscInt       s1,s2,s3;
17340e95ebc0SSatish Balay 
1735d64ed03dSBarry Smith   PetscFunctionBegin;
173636c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
173736c4a09eSSatish Balay   if (rr) {
173836c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
173929bbc08cSBarry Smith     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
174036c4a09eSSatish Balay     /* Overlap communication with computation. */
174136c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
174236c4a09eSSatish Balay   }
17430e95ebc0SSatish Balay   if (ll) {
17440e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
174529bbc08cSBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1746a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
17470e95ebc0SSatish Balay   }
174836c4a09eSSatish Balay   /* scale  the diagonal block */
174936c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
175036c4a09eSSatish Balay 
175136c4a09eSSatish Balay   if (rr) {
175236c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
175336c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1754a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
175536c4a09eSSatish Balay   }
175636c4a09eSSatish Balay 
17573a40ed3dSBarry Smith   PetscFunctionReturn(0);
17580e95ebc0SSatish Balay }
17590e95ebc0SSatish Balay 
17604a2ae208SSatish Balay #undef __FUNCT__
17614a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1762dfbe8321SBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
17630ac07820SSatish Balay {
17640ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
17656849ba73SBarry Smith   PetscErrorCode ierr;
1766b24ad042SBarry Smith   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1767b24ad042SBarry Smith   PetscInt       i,N,*rows,*owners = l->rowners;
1768b24ad042SBarry Smith   PetscInt       *nprocs,j,idx,nsends,row;
1769b24ad042SBarry Smith   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
17706543fbbaSBarry Smith   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1771521d7252SBarry Smith   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
17720ac07820SSatish Balay   MPI_Comm       comm = A->comm;
17730ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
17740ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
17750ac07820SSatish Balay   IS             istmp;
17766543fbbaSBarry Smith #if defined(PETSC_DEBUG)
17776543fbbaSBarry Smith   PetscTruth     found = PETSC_FALSE;
17786543fbbaSBarry Smith #endif
17790ac07820SSatish Balay 
1780d64ed03dSBarry Smith   PetscFunctionBegin;
1781f14a1c24SBarry Smith   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
17820ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
17830ac07820SSatish Balay 
17840ac07820SSatish Balay   /*  first count number of contributors to each processor */
1785b24ad042SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1786b24ad042SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1787b24ad042SBarry Smith   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
17886543fbbaSBarry Smith   j = 0;
17890ac07820SSatish Balay   for (i=0; i<N; i++) {
17906543fbbaSBarry Smith     if (lastidx > (idx = rows[i])) j = 0;
17916543fbbaSBarry Smith     lastidx = idx;
17926543fbbaSBarry Smith     for (; j<size; j++) {
17930ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
17946543fbbaSBarry Smith         nprocs[2*j]++;
17956543fbbaSBarry Smith         nprocs[2*j+1] = 1;
17966543fbbaSBarry Smith         owner[i] = j;
17976543fbbaSBarry Smith #if defined(PETSC_DEBUG)
17986543fbbaSBarry Smith         found = PETSC_TRUE;
17996543fbbaSBarry Smith #endif
18006543fbbaSBarry Smith         break;
18010ac07820SSatish Balay       }
18020ac07820SSatish Balay     }
18036543fbbaSBarry Smith #if defined(PETSC_DEBUG)
180429bbc08cSBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
18056543fbbaSBarry Smith     found = PETSC_FALSE;
18066543fbbaSBarry Smith #endif
18070ac07820SSatish Balay   }
1808c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
18090ac07820SSatish Balay 
18100ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
1811c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
18120ac07820SSatish Balay 
18130ac07820SSatish Balay   /* post receives:   */
1814b24ad042SBarry Smith   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1815b0a32e0cSBarry Smith   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
18160ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1817b24ad042SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
18180ac07820SSatish Balay   }
18190ac07820SSatish Balay 
18200ac07820SSatish Balay   /* do sends:
18210ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
18220ac07820SSatish Balay      the ith processor
18230ac07820SSatish Balay   */
1824b24ad042SBarry Smith   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1825b0a32e0cSBarry Smith   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1826b24ad042SBarry Smith   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
18270ac07820SSatish Balay   starts[0]  = 0;
1828c1dc657dSBarry Smith   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
18290ac07820SSatish Balay   for (i=0; i<N; i++) {
18300ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
18310ac07820SSatish Balay   }
18326831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
18330ac07820SSatish Balay 
18340ac07820SSatish Balay   starts[0] = 0;
1835c1dc657dSBarry Smith   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
18360ac07820SSatish Balay   count = 0;
18370ac07820SSatish Balay   for (i=0; i<size; i++) {
1838c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
1839b24ad042SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
18400ac07820SSatish Balay     }
18410ac07820SSatish Balay   }
1842606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
18430ac07820SSatish Balay 
18440ac07820SSatish Balay   base = owners[rank]*bs;
18450ac07820SSatish Balay 
18460ac07820SSatish Balay   /*  wait on receives */
1847b24ad042SBarry Smith   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
18480ac07820SSatish Balay   source = lens + nrecvs;
18490ac07820SSatish Balay   count  = nrecvs; slen = 0;
18500ac07820SSatish Balay   while (count) {
1851ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
18520ac07820SSatish Balay     /* unpack receives into our local space */
1853b24ad042SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
18540ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
18550ac07820SSatish Balay     lens[imdex]    = n;
18560ac07820SSatish Balay     slen          += n;
18570ac07820SSatish Balay     count--;
18580ac07820SSatish Balay   }
1859606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
18600ac07820SSatish Balay 
18610ac07820SSatish Balay   /* move the data into the send scatter */
1862b24ad042SBarry Smith   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
18630ac07820SSatish Balay   count = 0;
18640ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
18650ac07820SSatish Balay     values = rvalues + i*nmax;
18660ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
18670ac07820SSatish Balay       lrows[count++] = values[j] - base;
18680ac07820SSatish Balay     }
18690ac07820SSatish Balay   }
1870606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1871606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1872606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1873606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
18740ac07820SSatish Balay 
18750ac07820SSatish Balay   /* actually zap the local rows */
1876029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
187752e6d16bSBarry Smith   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
1878a07cd24cSSatish Balay 
187972dacd9aSBarry Smith   /*
188072dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
188172dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
188272dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
188372dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
188472dacd9aSBarry Smith 
188572dacd9aSBarry Smith        Contributed by: Mathew Knepley
188672dacd9aSBarry Smith   */
18879c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
18886fa18ffdSBarry Smith   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
18899c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
18906fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
18919c957beeSSatish Balay   } else if (diag) {
18926fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1893fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
189429bbc08cSBarry Smith       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1895fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
18966525c446SSatish Balay     }
1897a07cd24cSSatish Balay     for (i=0; i<slen; i++) {
1898a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
18993f11ea53SBarry Smith       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1900a07cd24cSSatish Balay     }
1901a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1902a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
19039c957beeSSatish Balay   } else {
19046fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1905a07cd24cSSatish Balay   }
19069c957beeSSatish Balay 
19079c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1908606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1909a07cd24cSSatish Balay 
19100ac07820SSatish Balay   /* wait on sends */
19110ac07820SSatish Balay   if (nsends) {
191282502324SSatish Balay     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1913ca161407SBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1914606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
19150ac07820SSatish Balay   }
1916606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1917606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
19180ac07820SSatish Balay 
19193a40ed3dSBarry Smith   PetscFunctionReturn(0);
19200ac07820SSatish Balay }
192172dacd9aSBarry Smith 
19224a2ae208SSatish Balay #undef __FUNCT__
19234a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1924dfbe8321SBarry Smith PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1925ba4ca20aSSatish Balay {
1926ba4ca20aSSatish Balay   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
192725fdafccSSatish Balay   MPI_Comm          comm = A->comm;
1928b24ad042SBarry Smith   static PetscTruth called = PETSC_FALSE;
1929dfbe8321SBarry Smith   PetscErrorCode    ierr;
1930ba4ca20aSSatish Balay 
1931d64ed03dSBarry Smith   PetscFunctionBegin;
19323a40ed3dSBarry Smith   if (!a->rank) {
19333a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
193425fdafccSSatish Balay   }
1935b24ad042SBarry Smith   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1936d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1937d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
19383a40ed3dSBarry Smith   PetscFunctionReturn(0);
1939ba4ca20aSSatish Balay }
19400ac07820SSatish Balay 
19414a2ae208SSatish Balay #undef __FUNCT__
19424a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1943dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1944bb5a7306SBarry Smith {
1945bb5a7306SBarry Smith   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1946dfbe8321SBarry Smith   PetscErrorCode ierr;
1947d64ed03dSBarry Smith 
1948d64ed03dSBarry Smith   PetscFunctionBegin;
1949bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
19503a40ed3dSBarry Smith   PetscFunctionReturn(0);
1951bb5a7306SBarry Smith }
1952bb5a7306SBarry Smith 
19536849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
19540ac07820SSatish Balay 
19554a2ae208SSatish Balay #undef __FUNCT__
19564a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ"
1957dfbe8321SBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
19587fc3c18eSBarry Smith {
19597fc3c18eSBarry Smith   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
19607fc3c18eSBarry Smith   Mat            a,b,c,d;
19617fc3c18eSBarry Smith   PetscTruth     flg;
1962dfbe8321SBarry Smith   PetscErrorCode ierr;
19637fc3c18eSBarry Smith 
19647fc3c18eSBarry Smith   PetscFunctionBegin;
19657fc3c18eSBarry Smith   a = matA->A; b = matA->B;
19667fc3c18eSBarry Smith   c = matB->A; d = matB->B;
19677fc3c18eSBarry Smith 
19687fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1969abc0a331SBarry Smith   if (flg) {
19707fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
19717fc3c18eSBarry Smith   }
19727fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
19737fc3c18eSBarry Smith   PetscFunctionReturn(0);
19747fc3c18eSBarry Smith }
19757fc3c18eSBarry Smith 
1976273d9f13SBarry Smith 
19774a2ae208SSatish Balay #undef __FUNCT__
19784a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1979dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1980273d9f13SBarry Smith {
1981dfbe8321SBarry Smith   PetscErrorCode ierr;
1982273d9f13SBarry Smith 
1983273d9f13SBarry Smith   PetscFunctionBegin;
1984273d9f13SBarry Smith   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1985273d9f13SBarry Smith   PetscFunctionReturn(0);
1986273d9f13SBarry Smith }
1987273d9f13SBarry Smith 
198879bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1989cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1990cc2dc46cSBarry Smith        MatSetValues_MPIBAIJ,
1991cc2dc46cSBarry Smith        MatGetRow_MPIBAIJ,
1992cc2dc46cSBarry Smith        MatRestoreRow_MPIBAIJ,
1993cc2dc46cSBarry Smith        MatMult_MPIBAIJ,
199497304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ,
19957c922b88SBarry Smith        MatMultTranspose_MPIBAIJ,
19967c922b88SBarry Smith        MatMultTransposeAdd_MPIBAIJ,
1997cc2dc46cSBarry Smith        0,
1998cc2dc46cSBarry Smith        0,
1999cc2dc46cSBarry Smith        0,
200097304618SKris Buschelman /*10*/ 0,
2001cc2dc46cSBarry Smith        0,
2002cc2dc46cSBarry Smith        0,
2003cc2dc46cSBarry Smith        0,
2004cc2dc46cSBarry Smith        MatTranspose_MPIBAIJ,
200597304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ,
20067fc3c18eSBarry Smith        MatEqual_MPIBAIJ,
2007cc2dc46cSBarry Smith        MatGetDiagonal_MPIBAIJ,
2008cc2dc46cSBarry Smith        MatDiagonalScale_MPIBAIJ,
2009cc2dc46cSBarry Smith        MatNorm_MPIBAIJ,
201097304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ,
2011cc2dc46cSBarry Smith        MatAssemblyEnd_MPIBAIJ,
2012cc2dc46cSBarry Smith        0,
2013cc2dc46cSBarry Smith        MatSetOption_MPIBAIJ,
2014cc2dc46cSBarry Smith        MatZeroEntries_MPIBAIJ,
201597304618SKris Buschelman /*25*/ MatZeroRows_MPIBAIJ,
2016cc2dc46cSBarry Smith        0,
2017cc2dc46cSBarry Smith        0,
2018cc2dc46cSBarry Smith        0,
2019cc2dc46cSBarry Smith        0,
202097304618SKris Buschelman /*30*/ MatSetUpPreallocation_MPIBAIJ,
2021273d9f13SBarry Smith        0,
2022cc2dc46cSBarry Smith        0,
2023cc2dc46cSBarry Smith        0,
2024cc2dc46cSBarry Smith        0,
202597304618SKris Buschelman /*35*/ MatDuplicate_MPIBAIJ,
2026cc2dc46cSBarry Smith        0,
2027cc2dc46cSBarry Smith        0,
2028cc2dc46cSBarry Smith        0,
2029cc2dc46cSBarry Smith        0,
203097304618SKris Buschelman /*40*/ 0,
2031cc2dc46cSBarry Smith        MatGetSubMatrices_MPIBAIJ,
2032cc2dc46cSBarry Smith        MatIncreaseOverlap_MPIBAIJ,
2033cc2dc46cSBarry Smith        MatGetValues_MPIBAIJ,
2034cc2dc46cSBarry Smith        0,
203597304618SKris Buschelman /*45*/ MatPrintHelp_MPIBAIJ,
2036cc2dc46cSBarry Smith        MatScale_MPIBAIJ,
2037cc2dc46cSBarry Smith        0,
2038cc2dc46cSBarry Smith        0,
2039cc2dc46cSBarry Smith        0,
2040521d7252SBarry Smith /*50*/ 0,
2041cc2dc46cSBarry Smith        0,
2042cc2dc46cSBarry Smith        0,
2043cc2dc46cSBarry Smith        0,
2044cc2dc46cSBarry Smith        0,
204597304618SKris Buschelman /*55*/ 0,
2046cc2dc46cSBarry Smith        0,
2047cc2dc46cSBarry Smith        MatSetUnfactored_MPIBAIJ,
2048cc2dc46cSBarry Smith        0,
2049cc2dc46cSBarry Smith        MatSetValuesBlocked_MPIBAIJ,
205097304618SKris Buschelman /*60*/ 0,
2051f14a1c24SBarry Smith        MatDestroy_MPIBAIJ,
2052f14a1c24SBarry Smith        MatView_MPIBAIJ,
20538a124369SBarry Smith        MatGetPetscMaps_Petsc,
20547843d17aSBarry Smith        0,
205597304618SKris Buschelman /*65*/ 0,
20567843d17aSBarry Smith        0,
20577843d17aSBarry Smith        0,
20587843d17aSBarry Smith        0,
20597843d17aSBarry Smith        0,
206097304618SKris Buschelman /*70*/ MatGetRowMax_MPIBAIJ,
20617843d17aSBarry Smith        0,
206297304618SKris Buschelman        0,
206397304618SKris Buschelman        0,
206497304618SKris Buschelman        0,
206597304618SKris Buschelman /*75*/ 0,
206697304618SKris Buschelman        0,
206797304618SKris Buschelman        0,
206897304618SKris Buschelman        0,
206997304618SKris Buschelman        0,
207097304618SKris Buschelman /*80*/ 0,
207197304618SKris Buschelman        0,
207297304618SKris Buschelman        0,
207397304618SKris Buschelman        0,
2074865e5f61SKris Buschelman        MatLoad_MPIBAIJ,
2075865e5f61SKris Buschelman /*85*/ 0,
2076865e5f61SKris Buschelman        0,
2077865e5f61SKris Buschelman        0,
2078865e5f61SKris Buschelman        0,
2079865e5f61SKris Buschelman        0,
2080865e5f61SKris Buschelman /*90*/ 0,
2081865e5f61SKris Buschelman        0,
2082865e5f61SKris Buschelman        0,
2083865e5f61SKris Buschelman        0,
2084865e5f61SKris Buschelman        0,
2085865e5f61SKris Buschelman /*95*/ 0,
2086865e5f61SKris Buschelman        0,
2087865e5f61SKris Buschelman        0,
2088865e5f61SKris Buschelman        0};
208979bdfe76SSatish Balay 
20905ef9f2a5SBarry Smith 
2091e18c124aSSatish Balay EXTERN_C_BEGIN
20924a2ae208SSatish Balay #undef __FUNCT__
20934a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2094dfbe8321SBarry Smith PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
20955ef9f2a5SBarry Smith {
20965ef9f2a5SBarry Smith   PetscFunctionBegin;
20975ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
20985ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
20995ef9f2a5SBarry Smith   PetscFunctionReturn(0);
21005ef9f2a5SBarry Smith }
2101e18c124aSSatish Balay EXTERN_C_END
210279bdfe76SSatish Balay 
2103273d9f13SBarry Smith EXTERN_C_BEGIN
2104ceb03754SKris Buschelman extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
2105d94109b8SHong Zhang EXTERN_C_END
2106d94109b8SHong Zhang 
2107aac34f13SBarry Smith #undef __FUNCT__
2108aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2109aac34f13SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2110aac34f13SBarry Smith {
2111aac34f13SBarry Smith   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2112aac34f13SBarry Smith   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2113aac34f13SBarry Smith   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2114aac34f13SBarry Smith   const PetscInt *JJ;
2115aac34f13SBarry Smith   PetscScalar    *values;
2116aac34f13SBarry Smith   PetscErrorCode ierr;
2117aac34f13SBarry Smith 
2118aac34f13SBarry Smith   PetscFunctionBegin;
2119aac34f13SBarry Smith #if defined(PETSC_OPT_g)
2120aac34f13SBarry Smith   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2121aac34f13SBarry Smith #endif
2122aac34f13SBarry Smith   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2123aac34f13SBarry Smith   o_nnz = d_nnz + m;
2124aac34f13SBarry Smith 
2125aac34f13SBarry Smith   for (i=0; i<m; i++) {
2126aac34f13SBarry Smith     nnz     = I[i+1]- I[i];
2127aac34f13SBarry Smith     JJ      = J + I[i];
2128aac34f13SBarry Smith     nnz_max = PetscMax(nnz_max,nnz);
2129aac34f13SBarry Smith #if defined(PETSC_OPT_g)
2130aac34f13SBarry Smith     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2131aac34f13SBarry Smith #endif
2132aac34f13SBarry Smith     for (j=0; j<nnz; j++) {
2133aac34f13SBarry Smith       if (*JJ >= cstart) break;
2134aac34f13SBarry Smith       JJ++;
2135aac34f13SBarry Smith     }
2136aac34f13SBarry Smith     d = 0;
2137aac34f13SBarry Smith     for (; j<nnz; j++) {
2138aac34f13SBarry Smith       if (*JJ++ >= cend) break;
2139aac34f13SBarry Smith       d++;
2140aac34f13SBarry Smith     }
2141aac34f13SBarry Smith     d_nnz[i] = d;
2142aac34f13SBarry Smith     o_nnz[i] = nnz - d;
2143aac34f13SBarry Smith   }
2144aac34f13SBarry Smith   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2145aac34f13SBarry Smith   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2146aac34f13SBarry Smith 
2147aac34f13SBarry Smith   if (v) values = (PetscScalar*)v;
2148aac34f13SBarry Smith   else {
2149aac34f13SBarry Smith     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2150aac34f13SBarry Smith     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2151aac34f13SBarry Smith   }
2152aac34f13SBarry Smith 
2153aac34f13SBarry Smith   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2154aac34f13SBarry Smith   for (i=0; i<m; i++) {
2155aac34f13SBarry Smith     ii   = i + rstart;
2156aac34f13SBarry Smith     nnz  = I[i+1]- I[i];
2157aac34f13SBarry Smith     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2158aac34f13SBarry Smith   }
2159aac34f13SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160aac34f13SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2161aac34f13SBarry Smith   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2162aac34f13SBarry Smith 
2163aac34f13SBarry Smith   if (!v) {
2164aac34f13SBarry Smith     ierr = PetscFree(values);CHKERRQ(ierr);
2165aac34f13SBarry Smith   }
2166aac34f13SBarry Smith   PetscFunctionReturn(0);
2167aac34f13SBarry Smith }
2168aac34f13SBarry Smith 
2169aac34f13SBarry Smith #undef __FUNCT__
2170aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2171aac34f13SBarry Smith /*@C
2172aac34f13SBarry Smith    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2173aac34f13SBarry Smith    (the default parallel PETSc format).
2174aac34f13SBarry Smith 
2175aac34f13SBarry Smith    Collective on MPI_Comm
2176aac34f13SBarry Smith 
2177aac34f13SBarry Smith    Input Parameters:
2178aac34f13SBarry Smith +  A - the matrix
2179aac34f13SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2180aac34f13SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2181aac34f13SBarry Smith -  v - optional values in the matrix
2182aac34f13SBarry Smith 
2183aac34f13SBarry Smith    Level: developer
2184aac34f13SBarry Smith 
2185aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel
2186aac34f13SBarry Smith 
2187aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2188aac34f13SBarry Smith @*/
2189aac34f13SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2190aac34f13SBarry Smith {
2191aac34f13SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2192aac34f13SBarry Smith 
2193aac34f13SBarry Smith   PetscFunctionBegin;
2194aac34f13SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2195aac34f13SBarry Smith   if (f) {
2196aac34f13SBarry Smith     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2197aac34f13SBarry Smith   }
2198aac34f13SBarry Smith   PetscFunctionReturn(0);
2199aac34f13SBarry Smith }
2200aac34f13SBarry Smith 
2201d94109b8SHong Zhang EXTERN_C_BEGIN
22024a2ae208SSatish Balay #undef __FUNCT__
2203a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2204b24ad042SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2205a23d5eceSKris Buschelman {
2206a23d5eceSKris Buschelman   Mat_MPIBAIJ    *b;
2207dfbe8321SBarry Smith   PetscErrorCode ierr;
2208b24ad042SBarry Smith   PetscInt       i;
2209a23d5eceSKris Buschelman 
2210a23d5eceSKris Buschelman   PetscFunctionBegin;
2211a23d5eceSKris Buschelman   B->preallocated = PETSC_TRUE;
2212a23d5eceSKris Buschelman   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2213a23d5eceSKris Buschelman 
2214a23d5eceSKris Buschelman   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2215a23d5eceSKris Buschelman   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2216a23d5eceSKris Buschelman   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
221777431f27SBarry Smith   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
221877431f27SBarry Smith   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2219a23d5eceSKris Buschelman   if (d_nnz) {
2220a23d5eceSKris Buschelman   for (i=0; i<B->m/bs; i++) {
222177431f27SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2222a23d5eceSKris Buschelman     }
2223a23d5eceSKris Buschelman   }
2224a23d5eceSKris Buschelman   if (o_nnz) {
2225a23d5eceSKris Buschelman     for (i=0; i<B->m/bs; i++) {
222677431f27SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2227a23d5eceSKris Buschelman     }
2228a23d5eceSKris Buschelman   }
2229a23d5eceSKris Buschelman 
2230a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2231a23d5eceSKris Buschelman   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2232a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2233a23d5eceSKris Buschelman   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2234a23d5eceSKris Buschelman 
2235a23d5eceSKris Buschelman   b = (Mat_MPIBAIJ*)B->data;
2236521d7252SBarry Smith   B->bs  = bs;
2237a23d5eceSKris Buschelman   b->bs2 = bs*bs;
2238a23d5eceSKris Buschelman   b->mbs = B->m/bs;
2239a23d5eceSKris Buschelman   b->nbs = B->n/bs;
2240a23d5eceSKris Buschelman   b->Mbs = B->M/bs;
2241a23d5eceSKris Buschelman   b->Nbs = B->N/bs;
2242a23d5eceSKris Buschelman 
2243b24ad042SBarry Smith   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2244a23d5eceSKris Buschelman   b->rowners[0]    = 0;
2245a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2246a23d5eceSKris Buschelman     b->rowners[i] += b->rowners[i-1];
2247a23d5eceSKris Buschelman   }
2248a23d5eceSKris Buschelman   b->rstart    = b->rowners[b->rank];
2249a23d5eceSKris Buschelman   b->rend      = b->rowners[b->rank+1];
2250a23d5eceSKris Buschelman 
2251b24ad042SBarry Smith   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2252a23d5eceSKris Buschelman   b->cowners[0] = 0;
2253a23d5eceSKris Buschelman   for (i=2; i<=b->size; i++) {
2254a23d5eceSKris Buschelman     b->cowners[i] += b->cowners[i-1];
2255a23d5eceSKris Buschelman   }
2256a23d5eceSKris Buschelman   b->cstart    = b->cowners[b->rank];
2257a23d5eceSKris Buschelman   b->cend      = b->cowners[b->rank+1];
2258a23d5eceSKris Buschelman 
2259a23d5eceSKris Buschelman   for (i=0; i<=b->size; i++) {
2260a23d5eceSKris Buschelman     b->rowners_bs[i] = b->rowners[i]*bs;
2261a23d5eceSKris Buschelman   }
2262a23d5eceSKris Buschelman   b->rstart_bs = b->rstart*bs;
2263a23d5eceSKris Buschelman   b->rend_bs   = b->rend*bs;
2264a23d5eceSKris Buschelman   b->cstart_bs = b->cstart*bs;
2265a23d5eceSKris Buschelman   b->cend_bs   = b->cend*bs;
2266a23d5eceSKris Buschelman 
22679c097c71SKris Buschelman   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
22689c097c71SKris Buschelman   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2269c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
227052e6d16bSBarry Smith   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
22719c097c71SKris Buschelman   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
22729c097c71SKris Buschelman   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2273c60e587dSKris Buschelman   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
227452e6d16bSBarry Smith   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2275c60e587dSKris Buschelman 
2276a23d5eceSKris Buschelman   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2277a23d5eceSKris Buschelman 
2278a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2279a23d5eceSKris Buschelman }
2280a23d5eceSKris Buschelman EXTERN_C_END
2281a23d5eceSKris Buschelman 
2282a23d5eceSKris Buschelman EXTERN_C_BEGIN
2283dfbe8321SBarry Smith EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2284dfbe8321SBarry Smith EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
228592b32695SKris Buschelman EXTERN_C_END
22865bf65638SKris Buschelman 
22870bad9183SKris Buschelman /*MC
2288fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
22890bad9183SKris Buschelman 
22900bad9183SKris Buschelman    Options Database Keys:
22910bad9183SKris Buschelman . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
22920bad9183SKris Buschelman 
22930bad9183SKris Buschelman   Level: beginner
22940bad9183SKris Buschelman 
22950bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ
22960bad9183SKris Buschelman M*/
22970bad9183SKris Buschelman 
229892b32695SKris Buschelman EXTERN_C_BEGIN
2299a23d5eceSKris Buschelman #undef __FUNCT__
23004a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ"
2301dfbe8321SBarry Smith PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2302273d9f13SBarry Smith {
2303273d9f13SBarry Smith   Mat_MPIBAIJ    *b;
2304dfbe8321SBarry Smith   PetscErrorCode ierr;
2305273d9f13SBarry Smith   PetscTruth     flg;
2306273d9f13SBarry Smith 
2307273d9f13SBarry Smith   PetscFunctionBegin;
230882502324SSatish Balay   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
230982502324SSatish Balay   B->data = (void*)b;
231082502324SSatish Balay 
2311273d9f13SBarry Smith   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2312273d9f13SBarry Smith   B->mapping    = 0;
2313273d9f13SBarry Smith   B->factor     = 0;
2314273d9f13SBarry Smith   B->assembled  = PETSC_FALSE;
2315273d9f13SBarry Smith 
2316273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
2317273d9f13SBarry Smith   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2318273d9f13SBarry Smith   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2319273d9f13SBarry Smith 
2320273d9f13SBarry Smith   /* build local table of row and column ownerships */
2321b24ad042SBarry Smith   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
232252e6d16bSBarry Smith   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2323273d9f13SBarry Smith   b->cowners    = b->rowners + b->size + 2;
2324273d9f13SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2325273d9f13SBarry Smith 
2326273d9f13SBarry Smith   /* build cache for off array entries formed */
2327273d9f13SBarry Smith   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2328273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
2329273d9f13SBarry Smith   b->colmap      = PETSC_NULL;
2330273d9f13SBarry Smith   b->garray      = PETSC_NULL;
2331273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2332273d9f13SBarry Smith 
2333cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
2334273d9f13SBarry Smith   /* stuff for MatSetValues_XXX in single precision */
233564a35ccbSBarry Smith   b->setvalueslen     = 0;
2336273d9f13SBarry Smith   b->setvaluescopy    = PETSC_NULL;
2337273d9f13SBarry Smith #endif
2338273d9f13SBarry Smith 
2339273d9f13SBarry Smith   /* stuff used in block assembly */
2340273d9f13SBarry Smith   b->barray       = 0;
2341273d9f13SBarry Smith 
2342273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2343273d9f13SBarry Smith   b->lvec         = 0;
2344273d9f13SBarry Smith   b->Mvctx        = 0;
2345273d9f13SBarry Smith 
2346273d9f13SBarry Smith   /* stuff for MatGetRow() */
2347273d9f13SBarry Smith   b->rowindices   = 0;
2348273d9f13SBarry Smith   b->rowvalues    = 0;
2349273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2350273d9f13SBarry Smith 
2351273d9f13SBarry Smith   /* hash table stuff */
2352273d9f13SBarry Smith   b->ht           = 0;
2353273d9f13SBarry Smith   b->hd           = 0;
2354273d9f13SBarry Smith   b->ht_size      = 0;
2355273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2356273d9f13SBarry Smith   b->ht_fact      = 0;
2357273d9f13SBarry Smith   b->ht_total_ct  = 0;
2358273d9f13SBarry Smith   b->ht_insert_ct = 0;
2359273d9f13SBarry Smith 
2360b0a32e0cSBarry Smith   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2361273d9f13SBarry Smith   if (flg) {
2362f6275e2eSBarry Smith     PetscReal fact = 1.39;
2363273d9f13SBarry Smith     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
236487828ca2SBarry Smith     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2365273d9f13SBarry Smith     if (fact <= 1.0) fact = 1.39;
2366273d9f13SBarry Smith     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
236763ba0a88SBarry Smith     ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
2368273d9f13SBarry Smith   }
2369273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2370273d9f13SBarry Smith                                      "MatStoreValues_MPIBAIJ",
2371273d9f13SBarry Smith                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2372273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2373273d9f13SBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
2374273d9f13SBarry Smith                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2375273d9f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2376273d9f13SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
2377273d9f13SBarry Smith                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2378a23d5eceSKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2379a23d5eceSKris Buschelman                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2380a23d5eceSKris Buschelman                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2381aac34f13SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2382aac34f13SBarry Smith 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2383aac34f13SBarry Smith 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
238492b32695SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
238592b32695SKris Buschelman                                      "MatDiagonalScaleLocal_MPIBAIJ",
238692b32695SKris Buschelman                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
23875bf65638SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
23885bf65638SKris Buschelman                                      "MatSetHashTableFactor_MPIBAIJ",
23895bf65638SKris Buschelman                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2390273d9f13SBarry Smith   PetscFunctionReturn(0);
2391273d9f13SBarry Smith }
2392273d9f13SBarry Smith EXTERN_C_END
2393273d9f13SBarry Smith 
2394209238afSKris Buschelman /*MC
2395002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2396209238afSKris Buschelman 
2397209238afSKris Buschelman    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2398209238afSKris Buschelman    and MATMPIBAIJ otherwise.
2399209238afSKris Buschelman 
2400209238afSKris Buschelman    Options Database Keys:
2401209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2402209238afSKris Buschelman 
2403209238afSKris Buschelman   Level: beginner
2404209238afSKris Buschelman 
2405aac34f13SBarry Smith .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2406209238afSKris Buschelman M*/
2407209238afSKris Buschelman 
2408209238afSKris Buschelman EXTERN_C_BEGIN
2409209238afSKris Buschelman #undef __FUNCT__
2410209238afSKris Buschelman #define __FUNCT__ "MatCreate_BAIJ"
2411dfbe8321SBarry Smith PetscErrorCode MatCreate_BAIJ(Mat A)
2412dfbe8321SBarry Smith {
24136849ba73SBarry Smith   PetscErrorCode ierr;
2414b24ad042SBarry Smith   PetscMPIInt    size;
2415209238afSKris Buschelman 
2416209238afSKris Buschelman   PetscFunctionBegin;
2417209238afSKris Buschelman   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2418209238afSKris Buschelman   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2419209238afSKris Buschelman   if (size == 1) {
2420209238afSKris Buschelman     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2421209238afSKris Buschelman   } else {
2422209238afSKris Buschelman     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2423209238afSKris Buschelman   }
2424209238afSKris Buschelman   PetscFunctionReturn(0);
2425209238afSKris Buschelman }
2426209238afSKris Buschelman EXTERN_C_END
2427209238afSKris Buschelman 
24284a2ae208SSatish Balay #undef __FUNCT__
24294a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2430273d9f13SBarry Smith /*@C
2431aac34f13SBarry Smith    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2432273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2433273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2434273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2435273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2436273d9f13SBarry Smith 
2437273d9f13SBarry Smith    Collective on Mat
2438273d9f13SBarry Smith 
2439273d9f13SBarry Smith    Input Parameters:
2440273d9f13SBarry Smith +  A - the matrix
2441273d9f13SBarry Smith .  bs   - size of blockk
2442273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2443273d9f13SBarry Smith            submatrix  (same for all local rows)
2444273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
2445273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
2446273d9f13SBarry Smith            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2447273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2448273d9f13SBarry Smith            submatrix (same for all local rows).
2449273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
2450273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
2451273d9f13SBarry Smith            each block row) or PETSC_NULL.
2452273d9f13SBarry Smith 
245349a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
2454273d9f13SBarry Smith 
2455273d9f13SBarry Smith    Options Database Keys:
2456273d9f13SBarry Smith .   -mat_no_unroll - uses code that does not unroll the loops in the
2457273d9f13SBarry Smith                      block calculations (much slower)
2458273d9f13SBarry Smith .   -mat_block_size - size of the blocks to use
2459273d9f13SBarry Smith 
2460273d9f13SBarry Smith    Notes:
2461273d9f13SBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2462273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
2463273d9f13SBarry Smith 
2464273d9f13SBarry Smith    Storage Information:
2465273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
2466273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
2467273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
2468273d9f13SBarry Smith    local matrix (a rectangular submatrix).
2469273d9f13SBarry Smith 
2470273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
2471273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
2472273d9f13SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2473273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
2474273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2475273d9f13SBarry Smith 
2476273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2477273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
2478273d9f13SBarry Smith 
2479273d9f13SBarry Smith .vb
2480273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
2481273d9f13SBarry Smith           -------------------
2482273d9f13SBarry Smith    row 3  |  o o o d d d o o o o o o
2483273d9f13SBarry Smith    row 4  |  o o o d d d o o o o o o
2484273d9f13SBarry Smith    row 5  |  o o o d d d o o o o o o
2485273d9f13SBarry Smith           -------------------
2486273d9f13SBarry Smith .ve
2487273d9f13SBarry Smith 
2488273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
2489273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
2490273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2491273d9f13SBarry Smith    stored simply in the MATSEQBAIJ format for compressed row storage.
2492273d9f13SBarry Smith 
2493273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2494273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2495273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
2496273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2497273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
2498273d9f13SBarry Smith    matrices.
2499273d9f13SBarry Smith 
2500273d9f13SBarry Smith    Level: intermediate
2501273d9f13SBarry Smith 
2502273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel
2503273d9f13SBarry Smith 
2504aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2505273d9f13SBarry Smith @*/
2506b24ad042SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2507273d9f13SBarry Smith {
2508b24ad042SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2509273d9f13SBarry Smith 
2510273d9f13SBarry Smith   PetscFunctionBegin;
2511a23d5eceSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2512a23d5eceSKris Buschelman   if (f) {
2513a23d5eceSKris Buschelman     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2514273d9f13SBarry Smith   }
2515273d9f13SBarry Smith   PetscFunctionReturn(0);
2516273d9f13SBarry Smith }
2517273d9f13SBarry Smith 
25184a2ae208SSatish Balay #undef __FUNCT__
25194a2ae208SSatish Balay #define __FUNCT__ "MatCreateMPIBAIJ"
252079bdfe76SSatish Balay /*@C
252179bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
252279bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
252379bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
252479bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
252579bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
252679bdfe76SSatish Balay 
2527db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
2528db81eaa0SLois Curfman McInnes 
252979bdfe76SSatish Balay    Input Parameters:
2530db81eaa0SLois Curfman McInnes +  comm - MPI communicator
253179bdfe76SSatish Balay .  bs   - size of blockk
253279bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
253392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
253492e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
253592e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
253692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
253792e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
2538be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2539be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
254047a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
254179bdfe76SSatish Balay            submatrix  (same for all local rows)
254247a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
254392e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2544db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
254547a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
254679bdfe76SSatish Balay            submatrix (same for all local rows).
254747a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
254892e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
254992e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
255079bdfe76SSatish Balay 
255179bdfe76SSatish Balay    Output Parameter:
255279bdfe76SSatish Balay .  A - the matrix
255379bdfe76SSatish Balay 
2554db81eaa0SLois Curfman McInnes    Options Database Keys:
2555db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
2556db81eaa0SLois Curfman McInnes                      block calculations (much slower)
2557db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
25583ffaccefSLois Curfman McInnes 
2559b259b22eSLois Curfman McInnes    Notes:
256049a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
256149a6f317SBarry Smith 
256247a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
256347a75d0bSBarry Smith 
256479bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
256579bdfe76SSatish Balay    (possibly both).
256679bdfe76SSatish Balay 
2567be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2568be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2569be79a94dSBarry Smith 
257079bdfe76SSatish Balay    Storage Information:
257179bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
257279bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
257379bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
257479bdfe76SSatish Balay    local matrix (a rectangular submatrix).
257579bdfe76SSatish Balay 
257679bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
257779bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
257879bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
257979bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
258079bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
258179bdfe76SSatish Balay 
258279bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
258379bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
258479bdfe76SSatish Balay 
2585db81eaa0SLois Curfman McInnes .vb
2586db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2587db81eaa0SLois Curfman McInnes           -------------------
2588db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2589db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2590db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2591db81eaa0SLois Curfman McInnes           -------------------
2592db81eaa0SLois Curfman McInnes .ve
259379bdfe76SSatish Balay 
259479bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
259579bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
259679bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
259757b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
259879bdfe76SSatish Balay 
2599d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2600d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
260179bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
260292e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
260392e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
26046da5968aSLois Curfman McInnes    matrices.
260579bdfe76SSatish Balay 
2606027ccd11SLois Curfman McInnes    Level: intermediate
2607027ccd11SLois Curfman McInnes 
260892e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
260979bdfe76SSatish Balay 
2610aac34f13SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
261179bdfe76SSatish Balay @*/
2612b24ad042SBarry Smith PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
261379bdfe76SSatish Balay {
26146849ba73SBarry Smith   PetscErrorCode ierr;
2615b24ad042SBarry Smith   PetscMPIInt    size;
261679bdfe76SSatish Balay 
2617d64ed03dSBarry Smith   PetscFunctionBegin;
2618273d9f13SBarry Smith   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2619d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2620273d9f13SBarry Smith   if (size > 1) {
2621273d9f13SBarry Smith     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2622273d9f13SBarry Smith     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2623273d9f13SBarry Smith   } else {
2624273d9f13SBarry Smith     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2625273d9f13SBarry Smith     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
26263914022bSBarry Smith   }
26273a40ed3dSBarry Smith   PetscFunctionReturn(0);
262879bdfe76SSatish Balay }
2629026e39d0SSatish Balay 
26304a2ae208SSatish Balay #undef __FUNCT__
26314a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ"
26326849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
26330ac07820SSatish Balay {
26340ac07820SSatish Balay   Mat            mat;
26350ac07820SSatish Balay   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2636dfbe8321SBarry Smith   PetscErrorCode ierr;
2637b24ad042SBarry Smith   PetscInt       len=0;
26380ac07820SSatish Balay 
2639d64ed03dSBarry Smith   PetscFunctionBegin;
26400ac07820SSatish Balay   *newmat       = 0;
2641273d9f13SBarry Smith   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2642be5d1d56SKris Buschelman   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
26431d5dac46SHong Zhang   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
26447fff6886SHong Zhang 
26454beb1cfeSHong Zhang   mat->factor       = matin->factor;
2646273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
26470ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
26487fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
26497fff6886SHong Zhang 
2650273d9f13SBarry Smith   a      = (Mat_MPIBAIJ*)mat->data;
2651521d7252SBarry Smith   mat->bs  = matin->bs;
26520ac07820SSatish Balay   a->bs2   = oldmat->bs2;
26530ac07820SSatish Balay   a->mbs   = oldmat->mbs;
26540ac07820SSatish Balay   a->nbs   = oldmat->nbs;
26550ac07820SSatish Balay   a->Mbs   = oldmat->Mbs;
26560ac07820SSatish Balay   a->Nbs   = oldmat->Nbs;
26570ac07820SSatish Balay 
26580ac07820SSatish Balay   a->rstart       = oldmat->rstart;
26590ac07820SSatish Balay   a->rend         = oldmat->rend;
26600ac07820SSatish Balay   a->cstart       = oldmat->cstart;
26610ac07820SSatish Balay   a->cend         = oldmat->cend;
26620ac07820SSatish Balay   a->size         = oldmat->size;
26630ac07820SSatish Balay   a->rank         = oldmat->rank;
2664aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2665aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2666aef5e8e0SSatish Balay   a->rowindices   = 0;
26670ac07820SSatish Balay   a->rowvalues    = 0;
26680ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
266930793edcSSatish Balay   a->barray       = 0;
26703123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
26713123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
26723123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
26733123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
26740ac07820SSatish Balay 
2675133cdb44SSatish Balay   /* hash table stuff */
2676133cdb44SSatish Balay   a->ht           = 0;
2677133cdb44SSatish Balay   a->hd           = 0;
2678133cdb44SSatish Balay   a->ht_size      = 0;
2679133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
268025fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2681133cdb44SSatish Balay   a->ht_total_ct  = 0;
2682133cdb44SSatish Balay   a->ht_insert_ct = 0;
2683133cdb44SSatish Balay 
2684b24ad042SBarry Smith   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
26858798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2686521d7252SBarry Smith   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
26870ac07820SSatish Balay   if (oldmat->colmap) {
2688aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
26890f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
269048e59246SSatish Balay #else
2691b24ad042SBarry Smith   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
269252e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2693b24ad042SBarry Smith   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
269448e59246SSatish Balay #endif
26950ac07820SSatish Balay   } else a->colmap = 0;
26964beb1cfeSHong Zhang 
26970ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2698b24ad042SBarry Smith     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
269952e6d16bSBarry Smith     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2700b24ad042SBarry Smith     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
27010ac07820SSatish Balay   } else a->garray = 0;
27020ac07820SSatish Balay 
27030ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
270452e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
27050ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
270652e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
27077fff6886SHong Zhang 
27082e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
270952e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
27102e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
271152e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2712b0a32e0cSBarry Smith   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
27130ac07820SSatish Balay   *newmat = mat;
27144beb1cfeSHong Zhang 
27153a40ed3dSBarry Smith   PetscFunctionReturn(0);
27160ac07820SSatish Balay }
271757b952d6SSatish Balay 
2718e090d566SSatish Balay #include "petscsys.h"
271957b952d6SSatish Balay 
27204a2ae208SSatish Balay #undef __FUNCT__
27214a2ae208SSatish Balay #define __FUNCT__ "MatLoad_MPIBAIJ"
2722dfbe8321SBarry Smith PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
272357b952d6SSatish Balay {
272457b952d6SSatish Balay   Mat            A;
27256849ba73SBarry Smith   PetscErrorCode ierr;
2726b24ad042SBarry Smith   int            fd;
2727b24ad042SBarry Smith   PetscInt       i,nz,j,rstart,rend;
272887828ca2SBarry Smith   PetscScalar    *vals,*buf;
272957b952d6SSatish Balay   MPI_Comm       comm = ((PetscObject)viewer)->comm;
273057b952d6SSatish Balay   MPI_Status     status;
2731b24ad042SBarry Smith   PetscMPIInt    rank,size,maxnz;
2732b24ad042SBarry Smith   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2733dc231df0SBarry Smith   PetscInt       *locrowlens,*procsnz = 0,*browners;
2734167e7480SBarry Smith   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2735dc231df0SBarry Smith   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2736b24ad042SBarry Smith   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2737dc231df0SBarry Smith   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
273857b952d6SSatish Balay 
2739d64ed03dSBarry Smith   PetscFunctionBegin;
2740b0a32e0cSBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
274157b952d6SSatish Balay 
2742d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2743d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
274457b952d6SSatish Balay   if (!rank) {
2745b0a32e0cSBarry Smith     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2746e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2747552e946dSBarry Smith     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
27486c5fab8fSBarry Smith   }
2749d64ed03dSBarry Smith 
2750b24ad042SBarry Smith   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
275157b952d6SSatish Balay   M = header[1]; N = header[2];
275257b952d6SSatish Balay 
275329bbc08cSBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
275457b952d6SSatish Balay 
275557b952d6SSatish Balay   /*
275657b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
275757b952d6SSatish Balay      divisible by the blocksize
275857b952d6SSatish Balay   */
275957b952d6SSatish Balay   Mbs        = M/bs;
2760dc231df0SBarry Smith   extra_rows = bs - M + bs*Mbs;
276157b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
276257b952d6SSatish Balay   else                  Mbs++;
276357b952d6SSatish Balay   if (extra_rows && !rank) {
276463ba0a88SBarry Smith     ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
276557b952d6SSatish Balay   }
2766537820f0SBarry Smith 
276757b952d6SSatish Balay   /* determine ownership of all rows */
276857b952d6SSatish Balay   mbs        = Mbs/size + ((Mbs % size) > rank);
276957b952d6SSatish Balay   m          = mbs*bs;
2770dc231df0SBarry Smith   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2771b24ad042SBarry Smith   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2772167e7480SBarry Smith 
2773167e7480SBarry Smith   /* process 0 needs enough room for process with most rows */
2774167e7480SBarry Smith   if (!rank) {
2775167e7480SBarry Smith     mmax = rowners[1];
2776167e7480SBarry Smith     for (i=2; i<size; i++) {
2777167e7480SBarry Smith       mmax = PetscMax(mmax,rowners[i]);
2778167e7480SBarry Smith     }
2779ca02efcfSSatish Balay     mmax*=bs;
2780167e7480SBarry Smith   } else mmax = m;
2781167e7480SBarry Smith 
278257b952d6SSatish Balay   rowners[0] = 0;
2783cee3aa6bSSatish Balay   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2784cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
278557b952d6SSatish Balay   rstart = rowners[rank];
278657b952d6SSatish Balay   rend   = rowners[rank+1];
278757b952d6SSatish Balay 
278857b952d6SSatish Balay   /* distribute row lengths to all processors */
278919c38ff2SBarry Smith   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
279057b952d6SSatish Balay   if (!rank) {
2791dc231df0SBarry Smith     mend = m;
2792dc231df0SBarry Smith     if (size == 1) mend = mend - extra_rows;
2793dc231df0SBarry Smith     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2794dc231df0SBarry Smith     for (j=mend; j<m; j++) locrowlens[j] = 1;
2795dc231df0SBarry Smith     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2796b24ad042SBarry Smith     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2797b24ad042SBarry Smith     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2798dc231df0SBarry Smith     for (j=0; j<m; j++) {
2799dc231df0SBarry Smith       procsnz[0] += locrowlens[j];
2800dc231df0SBarry Smith     }
2801dc231df0SBarry Smith     for (i=1; i<size; i++) {
2802dc231df0SBarry Smith       mend = browners[i+1] - browners[i];
2803dc231df0SBarry Smith       if (i == size-1) mend = mend - extra_rows;
2804dc231df0SBarry Smith       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2805dc231df0SBarry Smith       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2806dc231df0SBarry Smith       /* calculate the number of nonzeros on each processor */
2807dc231df0SBarry Smith       for (j=0; j<browners[i+1]-browners[i]; j++) {
280857b952d6SSatish Balay         procsnz[i] += rowlengths[j];
280957b952d6SSatish Balay       }
2810dc231df0SBarry Smith       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
281157b952d6SSatish Balay     }
2812606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2813dc231df0SBarry Smith   } else {
2814dc231df0SBarry Smith     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2815dc231df0SBarry Smith   }
281657b952d6SSatish Balay 
2817dc231df0SBarry Smith   if (!rank) {
281857b952d6SSatish Balay     /* determine max buffer needed and allocate it */
28198a8e0b3aSBarry Smith     maxnz = procsnz[0];
2820cdc0ba36SBarry Smith     for (i=1; i<size; i++) {
282157b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
282257b952d6SSatish Balay     }
2823b24ad042SBarry Smith     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
282457b952d6SSatish Balay 
282557b952d6SSatish Balay     /* read in my part of the matrix column indices  */
282657b952d6SSatish Balay     nz     = procsnz[0];
282719c38ff2SBarry Smith     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
282857b952d6SSatish Balay     mycols = ibuf;
2829cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2830e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2831cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2832cee3aa6bSSatish Balay 
283357b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
283457b952d6SSatish Balay     for (i=1; i<size-1; i++) {
283557b952d6SSatish Balay       nz   = procsnz[i];
2836e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2837b24ad042SBarry Smith       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
283857b952d6SSatish Balay     }
283957b952d6SSatish Balay     /* read in the stuff for the last proc */
284057b952d6SSatish Balay     if (size != 1) {
284157b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2842e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
284357b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2844b24ad042SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
284557b952d6SSatish Balay     }
2846606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2847d64ed03dSBarry Smith   } else {
284857b952d6SSatish Balay     /* determine buffer space needed for message */
284957b952d6SSatish Balay     nz = 0;
285057b952d6SSatish Balay     for (i=0; i<m; i++) {
285157b952d6SSatish Balay       nz += locrowlens[i];
285257b952d6SSatish Balay     }
285319c38ff2SBarry Smith     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
285457b952d6SSatish Balay     mycols = ibuf;
285557b952d6SSatish Balay     /* receive message of column indices*/
2856b24ad042SBarry Smith     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2857b24ad042SBarry Smith     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
285829bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
285957b952d6SSatish Balay   }
286057b952d6SSatish Balay 
286157b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2862dc231df0SBarry Smith   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2863dc231df0SBarry Smith   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2864dc231df0SBarry Smith   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2865dc231df0SBarry Smith   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2866dc231df0SBarry Smith   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
286757b952d6SSatish Balay   rowcount = 0; nzcount = 0;
286857b952d6SSatish Balay   for (i=0; i<mbs; i++) {
286957b952d6SSatish Balay     dcount  = 0;
287057b952d6SSatish Balay     odcount = 0;
287157b952d6SSatish Balay     for (j=0; j<bs; j++) {
287257b952d6SSatish Balay       kmax = locrowlens[rowcount];
287357b952d6SSatish Balay       for (k=0; k<kmax; k++) {
287457b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
287557b952d6SSatish Balay         if (!mask[tmp]) {
287657b952d6SSatish Balay           mask[tmp] = 1;
287757b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
287857b952d6SSatish Balay           else masked1[dcount++] = tmp;
287957b952d6SSatish Balay         }
288057b952d6SSatish Balay       }
288157b952d6SSatish Balay       rowcount++;
288257b952d6SSatish Balay     }
2883cee3aa6bSSatish Balay 
288457b952d6SSatish Balay     dlens[i]  = dcount;
288557b952d6SSatish Balay     odlens[i] = odcount;
2886cee3aa6bSSatish Balay 
288757b952d6SSatish Balay     /* zero out the mask elements we set */
288857b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
288957b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
289057b952d6SSatish Balay   }
2891cee3aa6bSSatish Balay 
289257b952d6SSatish Balay   /* create our matrix */
289378ae41b4SKris Buschelman   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
289478ae41b4SKris Buschelman   ierr = MatSetType(A,type);CHKERRQ(ierr)
289578ae41b4SKris Buschelman   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
289678ae41b4SKris Buschelman 
289778ae41b4SKris Buschelman   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2898dc231df0SBarry Smith   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
289957b952d6SSatish Balay 
290057b952d6SSatish Balay   if (!rank) {
290119c38ff2SBarry Smith     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
290257b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
290357b952d6SSatish Balay     nz = procsnz[0];
290457b952d6SSatish Balay     vals = buf;
2905cee3aa6bSSatish Balay     mycols = ibuf;
2906cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2907e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2908cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2909537820f0SBarry Smith 
291057b952d6SSatish Balay     /* insert into matrix */
291157b952d6SSatish Balay     jj      = rstart*bs;
291257b952d6SSatish Balay     for (i=0; i<m; i++) {
2913dc231df0SBarry Smith       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
291457b952d6SSatish Balay       mycols += locrowlens[i];
291557b952d6SSatish Balay       vals   += locrowlens[i];
291657b952d6SSatish Balay       jj++;
291757b952d6SSatish Balay     }
291857b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
291957b952d6SSatish Balay     for (i=1; i<size-1; i++) {
292057b952d6SSatish Balay       nz   = procsnz[i];
292157b952d6SSatish Balay       vals = buf;
2922e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2923ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
292457b952d6SSatish Balay     }
292557b952d6SSatish Balay     /* the last proc */
292657b952d6SSatish Balay     if (size != 1){
292757b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2928cee3aa6bSSatish Balay       vals = buf;
2929e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
293057b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2931ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
293257b952d6SSatish Balay     }
2933606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2934d64ed03dSBarry Smith   } else {
293557b952d6SSatish Balay     /* receive numeric values */
293619c38ff2SBarry Smith     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
293757b952d6SSatish Balay 
293857b952d6SSatish Balay     /* receive message of values*/
293957b952d6SSatish Balay     vals   = buf;
2940cee3aa6bSSatish Balay     mycols = ibuf;
2941ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2942ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
294329bbc08cSBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
294457b952d6SSatish Balay 
294557b952d6SSatish Balay     /* insert into matrix */
294657b952d6SSatish Balay     jj      = rstart*bs;
2947cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
2948dc231df0SBarry Smith       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
294957b952d6SSatish Balay       mycols += locrowlens[i];
295057b952d6SSatish Balay       vals   += locrowlens[i];
295157b952d6SSatish Balay       jj++;
295257b952d6SSatish Balay     }
295357b952d6SSatish Balay   }
2954606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2955606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2956606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2957dc231df0SBarry Smith   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2958dc231df0SBarry Smith   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2959dc231df0SBarry Smith   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
29606d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
29616d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
296278ae41b4SKris Buschelman 
296378ae41b4SKris Buschelman   *newmat = A;
29643a40ed3dSBarry Smith   PetscFunctionReturn(0);
296557b952d6SSatish Balay }
296657b952d6SSatish Balay 
29674a2ae208SSatish Balay #undef __FUNCT__
29684a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2969133cdb44SSatish Balay /*@
2970133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2971133cdb44SSatish Balay 
2972133cdb44SSatish Balay    Input Parameters:
2973133cdb44SSatish Balay .  mat  - the matrix
2974133cdb44SSatish Balay .  fact - factor
2975133cdb44SSatish Balay 
2976fee21e36SBarry Smith    Collective on Mat
2977fee21e36SBarry Smith 
29788c890885SBarry Smith    Level: advanced
29798c890885SBarry Smith 
2980133cdb44SSatish Balay   Notes:
2981133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2982133cdb44SSatish Balay 
2983133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2984133cdb44SSatish Balay 
2985133cdb44SSatish Balay .seealso: MatSetOption()
2986133cdb44SSatish Balay @*/
2987dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2988133cdb44SSatish Balay {
2989dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(Mat,PetscReal);
29905bf65638SKris Buschelman 
29915bf65638SKris Buschelman   PetscFunctionBegin;
29925bf65638SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
29935bf65638SKris Buschelman   if (f) {
29945bf65638SKris Buschelman     ierr = (*f)(mat,fact);CHKERRQ(ierr);
29955bf65638SKris Buschelman   }
29965bf65638SKris Buschelman   PetscFunctionReturn(0);
29975bf65638SKris Buschelman }
29985bf65638SKris Buschelman 
29995bf65638SKris Buschelman #undef __FUNCT__
30005bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3001dfbe8321SBarry Smith PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
30025bf65638SKris Buschelman {
300325fdafccSSatish Balay   Mat_MPIBAIJ *baij;
3004133cdb44SSatish Balay 
3005133cdb44SSatish Balay   PetscFunctionBegin;
3006133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
3007133cdb44SSatish Balay   baij->ht_fact = fact;
3008133cdb44SSatish Balay   PetscFunctionReturn(0);
3009133cdb44SSatish Balay }
3010f2a5309cSSatish Balay 
30114a2ae208SSatish Balay #undef __FUNCT__
30124a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3013b24ad042SBarry Smith PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3014f2a5309cSSatish Balay {
3015f2a5309cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3016f2a5309cSSatish Balay   PetscFunctionBegin;
3017f2a5309cSSatish Balay   *Ad     = a->A;
3018f2a5309cSSatish Balay   *Ao     = a->B;
3019195d93cdSBarry Smith   *colmap = a->garray;
3020f2a5309cSSatish Balay   PetscFunctionReturn(0);
3021f2a5309cSSatish Balay }
3022