xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 6fa18ffdeca28abd5fb4af63b4e02374ebfe45be)
1*6fa18ffdSBarry Smith /*$Id: mpibaij.c,v 1.192 2000/04/12 04:23:40 bsmith Exp bsmith $*/
279bdfe76SSatish Balay 
377ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
579bdfe76SSatish Balay 
657b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
757b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
8d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
97b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
1693fea6afSBarry Smith extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*);
1793fea6afSBarry Smith 
1893fea6afSBarry Smith /*  UGLY, ugly, ugly
1993fea6afSBarry Smith    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
2093fea6afSBarry Smith    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
2193fea6afSBarry Smith    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
2293fea6afSBarry Smith    converts the entries into single precision and then calls ..._MatScalar() to put them
2393fea6afSBarry Smith    into the single precision data structures.
2493fea6afSBarry Smith */
2593fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
26*6fa18ffdSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
2793fea6afSBarry Smith extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
2854750694SBarry Smith extern int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29*6fa18ffdSBarry Smith extern int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30*6fa18ffdSBarry Smith extern int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
3193fea6afSBarry Smith #else
32*6fa18ffdSBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
3393fea6afSBarry Smith #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
3493fea6afSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
35*6fa18ffdSBarry Smith #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
36*6fa18ffdSBarry Smith #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
3793fea6afSBarry Smith #endif
383b2fbd54SBarry Smith 
397fc3c18eSBarry Smith EXTERN_C_BEGIN
407fc3c18eSBarry Smith #undef __FUNC__
41*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ"
427fc3c18eSBarry Smith int MatStoreValues_MPIBAIJ(Mat mat)
437fc3c18eSBarry Smith {
447fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
457fc3c18eSBarry Smith   int         ierr;
467fc3c18eSBarry Smith 
477fc3c18eSBarry Smith   PetscFunctionBegin;
487fc3c18eSBarry Smith   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
497fc3c18eSBarry Smith   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
507fc3c18eSBarry Smith   PetscFunctionReturn(0);
517fc3c18eSBarry Smith }
527fc3c18eSBarry Smith EXTERN_C_END
537fc3c18eSBarry Smith 
547fc3c18eSBarry Smith EXTERN_C_BEGIN
557fc3c18eSBarry Smith #undef __FUNC__
56*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ"
577fc3c18eSBarry Smith int MatRetrieveValues_MPIBAIJ(Mat mat)
587fc3c18eSBarry Smith {
597fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
607fc3c18eSBarry Smith   int         ierr;
617fc3c18eSBarry Smith 
627fc3c18eSBarry Smith   PetscFunctionBegin;
637fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
647fc3c18eSBarry Smith   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
657fc3c18eSBarry Smith   PetscFunctionReturn(0);
667fc3c18eSBarry Smith }
677fc3c18eSBarry Smith EXTERN_C_END
687fc3c18eSBarry Smith 
69537820f0SBarry Smith /*
70537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
7157b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
7257b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
7357b952d6SSatish Balay    length of colmap equals the global matrix length.
7457b952d6SSatish Balay */
755615d1e5SSatish Balay #undef __FUNC__
76*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="CreateColmap_MPIBAIJ_Private"></a>*/"CreateColmap_MPIBAIJ_Private"
7757b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
7857b952d6SSatish Balay {
7957b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
8057b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
81dc2900e9SSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
8257b952d6SSatish Balay 
83d64ed03dSBarry Smith   PetscFunctionBegin;
84aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
850f5bd95cSBarry Smith   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
8648e59246SSatish Balay   for (i=0; i<nbs; i++){
870f5bd95cSBarry Smith     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
8848e59246SSatish Balay   }
8948e59246SSatish Balay #else
90758f045eSSatish Balay   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
9157b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
92549d3d68SSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
93928fc39bSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
9448e59246SSatish Balay #endif
953a40ed3dSBarry Smith   PetscFunctionReturn(0);
9657b952d6SSatish Balay }
9757b952d6SSatish Balay 
9880c1aa95SSatish Balay #define CHUNKSIZE  10
9980c1aa95SSatish Balay 
100f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
10180c1aa95SSatish Balay { \
10280c1aa95SSatish Balay  \
10380c1aa95SSatish Balay     brow = row/bs;  \
10480c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
105ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
10680c1aa95SSatish Balay       bcol = col/bs; \
10780c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
108ab26458aSBarry Smith       low = 0; high = nrow; \
109ab26458aSBarry Smith       while (high-low > 3) { \
110ab26458aSBarry Smith         t = (low+high)/2; \
111ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
112ab26458aSBarry Smith         else              low  = t; \
113ab26458aSBarry Smith       } \
114ab26458aSBarry Smith       for (_i=low; _i<high; _i++) { \
11580c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
11680c1aa95SSatish Balay         if (rp[_i] == bcol) { \
11780c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
118eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
119eada6651SSatish Balay           else                    *bap  = value;  \
120ac7a638eSSatish Balay           goto a_noinsert; \
12180c1aa95SSatish Balay         } \
12280c1aa95SSatish Balay       } \
12389280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
124a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
12580c1aa95SSatish Balay       if (nrow >= rmax) { \
12680c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
12780c1aa95SSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
1283eda8832SBarry Smith         MatScalar *new_a; \
12980c1aa95SSatish Balay  \
130a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
13189280ab3SLois Curfman McInnes  \
13280c1aa95SSatish Balay         /* malloc new storage space */ \
1333eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
1343eda8832SBarry Smith         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
13580c1aa95SSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
13680c1aa95SSatish Balay         new_i   = new_j + new_nz; \
13780c1aa95SSatish Balay  \
13880c1aa95SSatish Balay         /* copy over old data into new slots */ \
13980c1aa95SSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
14080c1aa95SSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
141549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
14280c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
1433eda8832SBarry Smith         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
1443eda8832SBarry Smith         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
145549d3d68SSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
146549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
1473eda8832SBarry Smith                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
14880c1aa95SSatish Balay         /* free up old matrix storage */ \
149606d414cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
150606d414cSSatish Balay         if (!a->singlemalloc) { \
151606d414cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
152606d414cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
153606d414cSSatish Balay         } \
15480c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
1557c922b88SBarry Smith         a->singlemalloc = PETSC_TRUE; \
15680c1aa95SSatish Balay  \
15780c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
158ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
1593eda8832SBarry Smith         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
16080c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
16180c1aa95SSatish Balay         a->reallocs++; \
16280c1aa95SSatish Balay         a->nz++; \
16380c1aa95SSatish Balay       } \
16480c1aa95SSatish Balay       N = nrow++ - 1;  \
16580c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
16680c1aa95SSatish Balay       for (ii=N; ii>=_i; ii--) { \
16780c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
1683eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
16980c1aa95SSatish Balay       } \
1703eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
17180c1aa95SSatish Balay       rp[_i]                      = bcol;  \
17280c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
173ac7a638eSSatish Balay       a_noinsert:; \
17480c1aa95SSatish Balay     ailen[brow] = nrow; \
17580c1aa95SSatish Balay }
17657b952d6SSatish Balay 
177ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
178ac7a638eSSatish Balay { \
179ac7a638eSSatish Balay     brow = row/bs;  \
180ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
181ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
182ac7a638eSSatish Balay       bcol = col/bs; \
183ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
184ac7a638eSSatish Balay       low = 0; high = nrow; \
185ac7a638eSSatish Balay       while (high-low > 3) { \
186ac7a638eSSatish Balay         t = (low+high)/2; \
187ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
188ac7a638eSSatish Balay         else              low  = t; \
189ac7a638eSSatish Balay       } \
190ac7a638eSSatish Balay       for (_i=low; _i<high; _i++) { \
191ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
192ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
193ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
194ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
195ac7a638eSSatish Balay           else                    *bap  = value;  \
196ac7a638eSSatish Balay           goto b_noinsert; \
197ac7a638eSSatish Balay         } \
198ac7a638eSSatish Balay       } \
19989280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
200a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
201ac7a638eSSatish Balay       if (nrow >= rmax) { \
202ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
20374c639caSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
2043eda8832SBarry Smith         MatScalar *new_a; \
205ac7a638eSSatish Balay  \
206a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
20789280ab3SLois Curfman McInnes  \
208ac7a638eSSatish Balay         /* malloc new storage space */ \
2093eda8832SBarry Smith         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
2103eda8832SBarry Smith         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
211ac7a638eSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
212ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
213ac7a638eSSatish Balay  \
214ac7a638eSSatish Balay         /* copy over old data into new slots */ \
215ac7a638eSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
21674c639caSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
217549d3d68SSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
218ac7a638eSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
2193eda8832SBarry Smith         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
2203eda8832SBarry Smith         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
2213eda8832SBarry Smith         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
222549d3d68SSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
2233eda8832SBarry Smith                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
224ac7a638eSSatish Balay         /* free up old matrix storage */ \
225606d414cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
226606d414cSSatish Balay         if (!b->singlemalloc) { \
227606d414cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
228606d414cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
229606d414cSSatish Balay         } \
23074c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
2317c922b88SBarry Smith         b->singlemalloc = PETSC_TRUE; \
232ac7a638eSSatish Balay  \
233ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
234ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
2353eda8832SBarry Smith         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
23674c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
23774c639caSSatish Balay         b->reallocs++; \
23874c639caSSatish Balay         b->nz++; \
239ac7a638eSSatish Balay       } \
240ac7a638eSSatish Balay       N = nrow++ - 1;  \
241ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
242ac7a638eSSatish Balay       for (ii=N; ii>=_i; ii--) { \
243ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
2443eda8832SBarry Smith         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
245ac7a638eSSatish Balay       } \
2463eda8832SBarry Smith       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
247ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
248ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
249ac7a638eSSatish Balay       b_noinsert:; \
250ac7a638eSSatish Balay     bilen[brow] = nrow; \
251ac7a638eSSatish Balay }
252ac7a638eSSatish Balay 
25393fea6afSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2545615d1e5SSatish Balay #undef __FUNC__
255*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
256ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
25757b952d6SSatish Balay {
258*6fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
25993fea6afSBarry Smith   int         ierr,i,N = m*n;
260*6fa18ffdSBarry Smith   MatScalar   *vsingle;
26193fea6afSBarry Smith 
26293fea6afSBarry Smith   PetscFunctionBegin;
263*6fa18ffdSBarry Smith   if (N > b->setvalueslen) {
264*6fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
265*6fa18ffdSBarry Smith     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
266*6fa18ffdSBarry Smith     b->setvalueslen  = N;
267*6fa18ffdSBarry Smith   }
268*6fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
269*6fa18ffdSBarry Smith 
27093fea6afSBarry Smith   for (i=0; i<N; i++) {
27193fea6afSBarry Smith     vsingle[i] = v[i];
27293fea6afSBarry Smith   }
27393fea6afSBarry Smith   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
27493fea6afSBarry Smith   PetscFunctionReturn(0);
27593fea6afSBarry Smith }
27693fea6afSBarry Smith 
27793fea6afSBarry Smith #undef __FUNC__
278*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ"
27993fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
28093fea6afSBarry Smith {
281*6fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
282*6fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
283*6fa18ffdSBarry Smith   MatScalar   *vsingle;
28493fea6afSBarry Smith 
28593fea6afSBarry Smith   PetscFunctionBegin;
286*6fa18ffdSBarry Smith   if (N > b->setvalueslen) {
287*6fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
288*6fa18ffdSBarry Smith     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
289*6fa18ffdSBarry Smith     b->setvalueslen  = N;
290*6fa18ffdSBarry Smith   }
291*6fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
29293fea6afSBarry Smith   for (i=0; i<N; i++) {
29393fea6afSBarry Smith     vsingle[i] = v[i];
29493fea6afSBarry Smith   }
29593fea6afSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
29693fea6afSBarry Smith   PetscFunctionReturn(0);
29793fea6afSBarry Smith }
298*6fa18ffdSBarry Smith 
299*6fa18ffdSBarry Smith #undef __FUNC__
300*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT"></a>*/"MatSetValues_MPIBAIJ_HT"
301*6fa18ffdSBarry Smith int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
302*6fa18ffdSBarry Smith {
303*6fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
304*6fa18ffdSBarry Smith   int         ierr,i,N = m*n;
305*6fa18ffdSBarry Smith   MatScalar   *vsingle;
306*6fa18ffdSBarry Smith 
307*6fa18ffdSBarry Smith   PetscFunctionBegin;
308*6fa18ffdSBarry Smith   if (N > b->setvalueslen) {
309*6fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
310*6fa18ffdSBarry Smith     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
311*6fa18ffdSBarry Smith     b->setvalueslen  = N;
312*6fa18ffdSBarry Smith   }
313*6fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
314*6fa18ffdSBarry Smith   for (i=0; i<N; i++) {
315*6fa18ffdSBarry Smith     vsingle[i] = v[i];
316*6fa18ffdSBarry Smith   }
317*6fa18ffdSBarry Smith   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
318*6fa18ffdSBarry Smith   PetscFunctionReturn(0);
319*6fa18ffdSBarry Smith }
320*6fa18ffdSBarry Smith 
321*6fa18ffdSBarry Smith #undef __FUNC__
322*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ_HT"></a>*/"MatSetValuesBlocked_MPIBAIJ_HT"
323*6fa18ffdSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
324*6fa18ffdSBarry Smith {
325*6fa18ffdSBarry Smith   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
326*6fa18ffdSBarry Smith   int         ierr,i,N = m*n*b->bs2;
327*6fa18ffdSBarry Smith   MatScalar   *vsingle;
328*6fa18ffdSBarry Smith 
329*6fa18ffdSBarry Smith   PetscFunctionBegin;
330*6fa18ffdSBarry Smith   if (N > b->setvalueslen) {
331*6fa18ffdSBarry Smith     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
332*6fa18ffdSBarry Smith     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
333*6fa18ffdSBarry Smith     b->setvalueslen  = N;
334*6fa18ffdSBarry Smith   }
335*6fa18ffdSBarry Smith   vsingle = b->setvaluescopy;
336*6fa18ffdSBarry Smith   for (i=0; i<N; i++) {
337*6fa18ffdSBarry Smith     vsingle[i] = v[i];
338*6fa18ffdSBarry Smith   }
339*6fa18ffdSBarry Smith   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
340*6fa18ffdSBarry Smith   PetscFunctionReturn(0);
341*6fa18ffdSBarry Smith }
34293fea6afSBarry Smith #endif
34393fea6afSBarry Smith 
34493fea6afSBarry Smith #undef __FUNC__
345*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
34693fea6afSBarry Smith int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
34793fea6afSBarry Smith {
34857b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
34993fea6afSBarry Smith   MatScalar   value;
3504fa0d573SSatish Balay   int         ierr,i,j,row,col;
3514fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
3524fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
3534fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
35457b952d6SSatish Balay 
355eada6651SSatish Balay   /* Some Variables required in the macro */
35680c1aa95SSatish Balay   Mat         A = baij->A;
35780c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
358ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
3593eda8832SBarry Smith   MatScalar   *aa=a->a;
360ac7a638eSSatish Balay 
361ac7a638eSSatish Balay   Mat         B = baij->B;
362ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
363ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
3643eda8832SBarry Smith   MatScalar   *ba=b->a;
365ac7a638eSSatish Balay 
366ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
367ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
3683eda8832SBarry Smith   MatScalar   *ap,*bap;
36980c1aa95SSatish Balay 
370d64ed03dSBarry Smith   PetscFunctionBegin;
37157b952d6SSatish Balay   for (i=0; i<m; i++) {
3725ef9f2a5SBarry Smith     if (im[i] < 0) continue;
373aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
374a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
375639f9d9dSBarry Smith #endif
37657b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
37757b952d6SSatish Balay       row = im[i] - rstart_orig;
37857b952d6SSatish Balay       for (j=0; j<n; j++) {
37957b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
38057b952d6SSatish Balay           col = in[j] - cstart_orig;
38157b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
382f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
38380c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
38473959e64SBarry Smith         } else if (in[j] < 0) continue;
385aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
386a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
387639f9d9dSBarry Smith #endif
38857b952d6SSatish Balay         else {
38957b952d6SSatish Balay           if (mat->was_assembled) {
390905e6a2fSBarry Smith             if (!baij->colmap) {
391905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
392905e6a2fSBarry Smith             }
393aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
3940f5bd95cSBarry Smith             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
395fa46199cSSatish Balay             col  = col - 1 + in[j]%bs;
39648e59246SSatish Balay #else
397905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
39848e59246SSatish Balay #endif
39957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
40057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
40157b952d6SSatish Balay               col =  in[j];
4029bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
4039bf004c3SSatish Balay               B = baij->B;
4049bf004c3SSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
4059bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
4069bf004c3SSatish Balay               ba=b->a;
40757b952d6SSatish Balay             }
408d64ed03dSBarry Smith           } else col = in[j];
40957b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
411ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
41257b952d6SSatish Balay         }
41357b952d6SSatish Balay       }
414d64ed03dSBarry Smith     } else {
41590f02eecSBarry Smith       if (!baij->donotstash) {
416ff2fd236SBarry Smith         if (roworiented) {
417*6fa18ffdSBarry Smith           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
418ff2fd236SBarry Smith         } else {
419*6fa18ffdSBarry Smith           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
42057b952d6SSatish Balay         }
42157b952d6SSatish Balay       }
42257b952d6SSatish Balay     }
42390f02eecSBarry Smith   }
4243a40ed3dSBarry Smith   PetscFunctionReturn(0);
42557b952d6SSatish Balay }
42657b952d6SSatish Balay 
427ab26458aSBarry Smith #undef __FUNC__
428*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ"
42993fea6afSBarry Smith int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
430ab26458aSBarry Smith {
431ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
43293fea6afSBarry Smith   MatScalar   *value,*barray=baij->barray;
4337ef1d9bdSSatish Balay   int         ierr,i,j,ii,jj,row,col;
434ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
435ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
436ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
437ab26458aSBarry Smith 
438b16ae2b1SBarry Smith   PetscFunctionBegin;
43930793edcSSatish Balay   if(!barray) {
44093fea6afSBarry Smith     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
44130793edcSSatish Balay   }
44230793edcSSatish Balay 
443ab26458aSBarry Smith   if (roworiented) {
444ab26458aSBarry Smith     stepval = (n-1)*bs;
445ab26458aSBarry Smith   } else {
446ab26458aSBarry Smith     stepval = (m-1)*bs;
447ab26458aSBarry Smith   }
448ab26458aSBarry Smith   for (i=0; i<m; i++) {
4495ef9f2a5SBarry Smith     if (im[i] < 0) continue;
450aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4515ef9f2a5SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
452ab26458aSBarry Smith #endif
453ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
454ab26458aSBarry Smith       row = im[i] - rstart;
455ab26458aSBarry Smith       for (j=0; j<n; j++) {
45615b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
45715b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
45815b57d14SSatish Balay           barray = v + i*bs2;
45915b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
46015b57d14SSatish Balay           barray = v + j*bs2;
46115b57d14SSatish Balay         } else { /* Here a copy is required */
462ab26458aSBarry Smith           if (roworiented) {
463ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
464ab26458aSBarry Smith           } else {
465ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
466abef11f7SSatish Balay           }
46747513183SBarry Smith           for (ii=0; ii<bs; ii++,value+=stepval) {
46847513183SBarry Smith             for (jj=0; jj<bs; jj++) {
46930793edcSSatish Balay               *barray++  = *value++;
47047513183SBarry Smith             }
47147513183SBarry Smith           }
47230793edcSSatish Balay           barray -=bs2;
47315b57d14SSatish Balay         }
474abef11f7SSatish Balay 
475abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
476abef11f7SSatish Balay           col  = in[j] - cstart;
47793fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
478ab26458aSBarry Smith         }
4795ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
480aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
4815ef9f2a5SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
482ab26458aSBarry Smith #endif
483ab26458aSBarry Smith         else {
484ab26458aSBarry Smith           if (mat->was_assembled) {
485ab26458aSBarry Smith             if (!baij->colmap) {
486ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
487ab26458aSBarry Smith             }
488a5eb4965SSatish Balay 
489aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
490aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
491fa46199cSSatish Balay             { int data;
4920f5bd95cSBarry Smith               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4930f5bd95cSBarry Smith               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
494fa46199cSSatish Balay             }
49548e59246SSatish Balay #else
4960f5bd95cSBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
497a5eb4965SSatish Balay #endif
49848e59246SSatish Balay #endif
499aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
5000f5bd95cSBarry Smith 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
501fa46199cSSatish Balay             col  = (col - 1)/bs;
50248e59246SSatish Balay #else
503a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
50448e59246SSatish Balay #endif
505ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
506ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
507ab26458aSBarry Smith               col =  in[j];
508ab26458aSBarry Smith             }
509ab26458aSBarry Smith           }
510ab26458aSBarry Smith           else col = in[j];
51193fea6afSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
512ab26458aSBarry Smith         }
513ab26458aSBarry Smith       }
514d64ed03dSBarry Smith     } else {
515ab26458aSBarry Smith       if (!baij->donotstash) {
516ff2fd236SBarry Smith         if (roworiented) {
517*6fa18ffdSBarry Smith           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
518ff2fd236SBarry Smith         } else {
519*6fa18ffdSBarry Smith           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
520ff2fd236SBarry Smith         }
521abef11f7SSatish Balay       }
522ab26458aSBarry Smith     }
523ab26458aSBarry Smith   }
5243a40ed3dSBarry Smith   PetscFunctionReturn(0);
525ab26458aSBarry Smith }
526*6fa18ffdSBarry Smith 
5270bdbc534SSatish Balay #define HASH_KEY 0.6180339887
528c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
529*6fa18ffdSBarry Smith /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
530c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
5315615d1e5SSatish Balay #undef __FUNC__
532*6fa18ffdSBarry Smith #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPIBAIJ_HT_MatScalar"
533*6fa18ffdSBarry Smith int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
5340bdbc534SSatish Balay {
5350bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
5360bdbc534SSatish Balay   int         ierr,i,j,row,col;
5370bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
538c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
539c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
540329f5518SBarry Smith   PetscReal   tmp;
5413eda8832SBarry Smith   MatScalar   **HD = baij->hd,value;
542aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5434a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5444a15367fSSatish Balay #endif
5450bdbc534SSatish Balay 
5460bdbc534SSatish Balay   PetscFunctionBegin;
5470bdbc534SSatish Balay 
5480bdbc534SSatish Balay   for (i=0; i<m; i++) {
549aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
5500bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
5510bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
5520bdbc534SSatish Balay #endif
5530bdbc534SSatish Balay       row = im[i];
554c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
5550bdbc534SSatish Balay       for (j=0; j<n; j++) {
5560bdbc534SSatish Balay         col = in[j];
557*6fa18ffdSBarry Smith         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5580bdbc534SSatish Balay         /* Look up into the Hash Table */
559c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
560c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5610bdbc534SSatish Balay 
562c2760754SSatish Balay 
563c2760754SSatish Balay         idx = h1;
564aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
565187ce0cbSSatish Balay         insert_ct++;
566187ce0cbSSatish Balay         total_ct++;
567187ce0cbSSatish Balay         if (HT[idx] != key) {
568187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
569187ce0cbSSatish Balay           if (idx == size) {
570187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
571187ce0cbSSatish Balay             if (idx == h1) {
572187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
573187ce0cbSSatish Balay             }
574187ce0cbSSatish Balay           }
575187ce0cbSSatish Balay         }
576187ce0cbSSatish Balay #else
577c2760754SSatish Balay         if (HT[idx] != key) {
578c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
579c2760754SSatish Balay           if (idx == size) {
580c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
581c2760754SSatish Balay             if (idx == h1) {
582c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
583c2760754SSatish Balay             }
584c2760754SSatish Balay           }
585c2760754SSatish Balay         }
586187ce0cbSSatish Balay #endif
587c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
588c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
589c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
5900bdbc534SSatish Balay       }
5910bdbc534SSatish Balay     } else {
5920bdbc534SSatish Balay       if (!baij->donotstash) {
593ff2fd236SBarry Smith         if (roworiented) {
5948798bf22SSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
595ff2fd236SBarry Smith         } else {
5968798bf22SSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5970bdbc534SSatish Balay         }
5980bdbc534SSatish Balay       }
5990bdbc534SSatish Balay     }
6000bdbc534SSatish Balay   }
601aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
602187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
603187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
604187ce0cbSSatish Balay #endif
6050bdbc534SSatish Balay   PetscFunctionReturn(0);
6060bdbc534SSatish Balay }
6070bdbc534SSatish Balay 
6080bdbc534SSatish Balay #undef __FUNC__
609*6fa18ffdSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
610*6fa18ffdSBarry Smith int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
6110bdbc534SSatish Balay {
6120bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
6138798bf22SSatish Balay   int         ierr,i,j,ii,jj,row,col;
6140bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
615b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
616c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
617329f5518SBarry Smith   PetscReal   tmp;
6183eda8832SBarry Smith   MatScalar   **HD = baij->hd,*baij_a;
619*6fa18ffdSBarry Smith   MatScalar   *v_t,*value;
620aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
6214a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
6224a15367fSSatish Balay #endif
6230bdbc534SSatish Balay 
624d0a41580SSatish Balay   PetscFunctionBegin;
625d0a41580SSatish Balay 
6260bdbc534SSatish Balay   if (roworiented) {
6270bdbc534SSatish Balay     stepval = (n-1)*bs;
6280bdbc534SSatish Balay   } else {
6290bdbc534SSatish Balay     stepval = (m-1)*bs;
6300bdbc534SSatish Balay   }
6310bdbc534SSatish Balay   for (i=0; i<m; i++) {
632aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
6330bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
6340bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
6350bdbc534SSatish Balay #endif
6360bdbc534SSatish Balay     row   = im[i];
637187ce0cbSSatish Balay     v_t   = v + i*bs2;
638c2760754SSatish Balay     if (row >= rstart && row < rend) {
6390bdbc534SSatish Balay       for (j=0; j<n; j++) {
6400bdbc534SSatish Balay         col = in[j];
6410bdbc534SSatish Balay 
6420bdbc534SSatish Balay         /* Look up into the Hash Table */
643c2760754SSatish Balay         key = row*Nbs+col+1;
644c2760754SSatish Balay         h1  = HASH(size,key,tmp);
6450bdbc534SSatish Balay 
646c2760754SSatish Balay         idx = h1;
647aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
648187ce0cbSSatish Balay         total_ct++;
649187ce0cbSSatish Balay         insert_ct++;
650187ce0cbSSatish Balay        if (HT[idx] != key) {
651187ce0cbSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
652187ce0cbSSatish Balay           if (idx == size) {
653187ce0cbSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
654187ce0cbSSatish Balay             if (idx == h1) {
655187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
656187ce0cbSSatish Balay             }
657187ce0cbSSatish Balay           }
658187ce0cbSSatish Balay         }
659187ce0cbSSatish Balay #else
660c2760754SSatish Balay         if (HT[idx] != key) {
661c2760754SSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
662c2760754SSatish Balay           if (idx == size) {
663c2760754SSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
664c2760754SSatish Balay             if (idx == h1) {
665c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
666c2760754SSatish Balay             }
667c2760754SSatish Balay           }
668c2760754SSatish Balay         }
669187ce0cbSSatish Balay #endif
670c2760754SSatish Balay         baij_a = HD[idx];
6710bdbc534SSatish Balay         if (roworiented) {
672c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
673187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
674187ce0cbSSatish Balay           value = v_t;
675187ce0cbSSatish Balay           v_t  += bs;
676fef45726SSatish Balay           if (addv == ADD_VALUES) {
677c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
678c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
679fef45726SSatish Balay                 baij_a[jj]  += *value++;
680b4cc0f5aSSatish Balay               }
681b4cc0f5aSSatish Balay             }
682fef45726SSatish Balay           } else {
683c2760754SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
684c2760754SSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
685fef45726SSatish Balay                 baij_a[jj]  = *value++;
686fef45726SSatish Balay               }
687fef45726SSatish Balay             }
688fef45726SSatish Balay           }
6890bdbc534SSatish Balay         } else {
6900bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
691fef45726SSatish Balay           if (addv == ADD_VALUES) {
692b4cc0f5aSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
6930bdbc534SSatish Balay               for (jj=0; jj<bs; jj++) {
694fef45726SSatish Balay                 baij_a[jj]  += *value++;
695fef45726SSatish Balay               }
696fef45726SSatish Balay             }
697fef45726SSatish Balay           } else {
698fef45726SSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
699fef45726SSatish Balay               for (jj=0; jj<bs; jj++) {
700fef45726SSatish Balay                 baij_a[jj]  = *value++;
701fef45726SSatish Balay               }
702b4cc0f5aSSatish Balay             }
7030bdbc534SSatish Balay           }
7040bdbc534SSatish Balay         }
7050bdbc534SSatish Balay       }
7060bdbc534SSatish Balay     } else {
7070bdbc534SSatish Balay       if (!baij->donotstash) {
7080bdbc534SSatish Balay         if (roworiented) {
7098798bf22SSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7100bdbc534SSatish Balay         } else {
7118798bf22SSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
7120bdbc534SSatish Balay         }
7130bdbc534SSatish Balay       }
7140bdbc534SSatish Balay     }
7150bdbc534SSatish Balay   }
716aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
717187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
718187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
719187ce0cbSSatish Balay #endif
7200bdbc534SSatish Balay   PetscFunctionReturn(0);
7210bdbc534SSatish Balay }
722133cdb44SSatish Balay 
7230bdbc534SSatish Balay #undef __FUNC__
724b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ"
725ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
726d6de1c52SSatish Balay {
727d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
728d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
72948e59246SSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
730d6de1c52SSatish Balay 
731133cdb44SSatish Balay   PetscFunctionBegin;
732d6de1c52SSatish Balay   for (i=0; i<m; i++) {
733a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
734a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
735d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
736d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
737d6de1c52SSatish Balay       for (j=0; j<n; j++) {
738a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
739a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
740d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
741d6de1c52SSatish Balay           col = idxn[j] - bscstart;
74298dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
743d64ed03dSBarry Smith         } else {
744905e6a2fSBarry Smith           if (!baij->colmap) {
745905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
746905e6a2fSBarry Smith           }
747aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
7480f5bd95cSBarry Smith           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
749fa46199cSSatish Balay           data --;
75048e59246SSatish Balay #else
75148e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
75248e59246SSatish Balay #endif
75348e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
754d9d09a02SSatish Balay           else {
75548e59246SSatish Balay             col  = data + idxn[j]%bs;
75698dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
757d6de1c52SSatish Balay           }
758d6de1c52SSatish Balay         }
759d6de1c52SSatish Balay       }
760d64ed03dSBarry Smith     } else {
761a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
762d6de1c52SSatish Balay     }
763d6de1c52SSatish Balay   }
7643a40ed3dSBarry Smith  PetscFunctionReturn(0);
765d6de1c52SSatish Balay }
766d6de1c52SSatish Balay 
7675615d1e5SSatish Balay #undef __FUNC__
768b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ"
769329f5518SBarry Smith int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
770d6de1c52SSatish Balay {
771d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
772d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
773acdf5bf4SSatish Balay   int        ierr,i,bs2=baij->bs2;
774329f5518SBarry Smith   PetscReal  sum = 0.0;
7753eda8832SBarry Smith   MatScalar  *v;
776d6de1c52SSatish Balay 
777d64ed03dSBarry Smith   PetscFunctionBegin;
778d6de1c52SSatish Balay   if (baij->size == 1) {
779d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
780d6de1c52SSatish Balay   } else {
781d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
782d6de1c52SSatish Balay       v = amat->a;
783d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++) {
784aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
785329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
786d6de1c52SSatish Balay #else
787d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
788d6de1c52SSatish Balay #endif
789d6de1c52SSatish Balay       }
790d6de1c52SSatish Balay       v = bmat->a;
791d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++) {
792aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
793329f5518SBarry Smith         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
794d6de1c52SSatish Balay #else
795d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
796d6de1c52SSatish Balay #endif
797d6de1c52SSatish Balay       }
798ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
799d6de1c52SSatish Balay       *norm = sqrt(*norm);
800d64ed03dSBarry Smith     } else {
801e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
802d6de1c52SSatish Balay     }
803d64ed03dSBarry Smith   }
8043a40ed3dSBarry Smith   PetscFunctionReturn(0);
805d6de1c52SSatish Balay }
80657b952d6SSatish Balay 
807bd7f49f5SSatish Balay 
808fef45726SSatish Balay /*
809fef45726SSatish Balay   Creates the hash table, and sets the table
810fef45726SSatish Balay   This table is created only once.
811fef45726SSatish Balay   If new entried need to be added to the matrix
812fef45726SSatish Balay   then the hash table has to be destroyed and
813fef45726SSatish Balay   recreated.
814fef45726SSatish Balay */
815fef45726SSatish Balay #undef __FUNC__
816b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private"
817329f5518SBarry Smith int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
818596b8d2eSBarry Smith {
819596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
820596b8d2eSBarry Smith   Mat         A = baij->A,B=baij->B;
821596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
8220bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
823549d3d68SSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
824187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
825fef45726SSatish Balay   int         *HT,key;
8263eda8832SBarry Smith   MatScalar   **HD;
827329f5518SBarry Smith   PetscReal   tmp;
828aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
8294a15367fSSatish Balay   int         ct=0,max=0;
8304a15367fSSatish Balay #endif
831fef45726SSatish Balay 
832d64ed03dSBarry Smith   PetscFunctionBegin;
8330bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
8340bdbc534SSatish Balay   size = baij->ht_size;
835fef45726SSatish Balay 
8360bdbc534SSatish Balay   if (baij->ht) {
8370bdbc534SSatish Balay     PetscFunctionReturn(0);
838596b8d2eSBarry Smith   }
8390bdbc534SSatish Balay 
840fef45726SSatish Balay   /* Allocate Memory for Hash Table */
8413eda8832SBarry Smith   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
842b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
843b9e4cc15SSatish Balay   HD = baij->hd;
844a07cd24cSSatish Balay   HT = baij->ht;
845b9e4cc15SSatish Balay 
846b9e4cc15SSatish Balay 
847549d3d68SSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
8480bdbc534SSatish Balay 
849596b8d2eSBarry Smith 
850596b8d2eSBarry Smith   /* Loop Over A */
8510bdbc534SSatish Balay   for (i=0; i<a->mbs; i++) {
852596b8d2eSBarry Smith     for (j=ai[i]; j<ai[i+1]; j++) {
8530bdbc534SSatish Balay       row = i+rstart;
8540bdbc534SSatish Balay       col = aj[j]+cstart;
855596b8d2eSBarry Smith 
856187ce0cbSSatish Balay       key = row*Nbs + col + 1;
857c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8580bdbc534SSatish Balay       for (k=0; k<size; k++){
8590bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8600bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8610bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
862596b8d2eSBarry Smith           break;
863aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
864187ce0cbSSatish Balay         } else {
865187ce0cbSSatish Balay           ct++;
866187ce0cbSSatish Balay #endif
867596b8d2eSBarry Smith         }
868187ce0cbSSatish Balay       }
869aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
870187ce0cbSSatish Balay       if (k> max) max = k;
871187ce0cbSSatish Balay #endif
872596b8d2eSBarry Smith     }
873596b8d2eSBarry Smith   }
874596b8d2eSBarry Smith   /* Loop Over B */
8750bdbc534SSatish Balay   for (i=0; i<b->mbs; i++) {
876596b8d2eSBarry Smith     for (j=bi[i]; j<bi[i+1]; j++) {
8770bdbc534SSatish Balay       row = i+rstart;
8780bdbc534SSatish Balay       col = garray[bj[j]];
879187ce0cbSSatish Balay       key = row*Nbs + col + 1;
880c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8810bdbc534SSatish Balay       for (k=0; k<size; k++){
8820bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8830bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8840bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
885596b8d2eSBarry Smith           break;
886aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
887187ce0cbSSatish Balay         } else {
888187ce0cbSSatish Balay           ct++;
889187ce0cbSSatish Balay #endif
890596b8d2eSBarry Smith         }
891187ce0cbSSatish Balay       }
892aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
893187ce0cbSSatish Balay       if (k> max) max = k;
894187ce0cbSSatish Balay #endif
895596b8d2eSBarry Smith     }
896596b8d2eSBarry Smith   }
897596b8d2eSBarry Smith 
898596b8d2eSBarry Smith   /* Print Summary */
899aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
900c38d4ed2SBarry Smith   for (i=0,j=0; i<size; i++) {
901596b8d2eSBarry Smith     if (HT[i]) {j++;}
902c38d4ed2SBarry Smith   }
903329f5518SBarry Smith   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
904187ce0cbSSatish Balay #endif
9053a40ed3dSBarry Smith   PetscFunctionReturn(0);
906596b8d2eSBarry Smith }
90757b952d6SSatish Balay 
908bbb85fb3SSatish Balay #undef __FUNC__
909b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ"
910bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
911bbb85fb3SSatish Balay {
912bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
913ff2fd236SBarry Smith   int         ierr,nstash,reallocs;
914bbb85fb3SSatish Balay   InsertMode  addv;
915bbb85fb3SSatish Balay 
916bbb85fb3SSatish Balay   PetscFunctionBegin;
917bbb85fb3SSatish Balay   if (baij->donotstash) {
918bbb85fb3SSatish Balay     PetscFunctionReturn(0);
919bbb85fb3SSatish Balay   }
920bbb85fb3SSatish Balay 
921bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
922bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
923bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
924bbb85fb3SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
925bbb85fb3SSatish Balay   }
926bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
927bbb85fb3SSatish Balay 
9288798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
9298798bf22SSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
9308798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
9315a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
9328798bf22SSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
9335a655dc6SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
934bbb85fb3SSatish Balay   PetscFunctionReturn(0);
935bbb85fb3SSatish Balay }
936bbb85fb3SSatish Balay 
937bbb85fb3SSatish Balay #undef __FUNC__
938b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ"
939bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
940bbb85fb3SSatish Balay {
941bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
942a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
943a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
9447c922b88SBarry Smith   int         *row,*col,other_disassembled;
9457c922b88SBarry Smith   PetscTruth  r1,r2,r3;
9463eda8832SBarry Smith   MatScalar   *val;
947bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
948bbb85fb3SSatish Balay 
949bbb85fb3SSatish Balay   PetscFunctionBegin;
950bbb85fb3SSatish Balay   if (!baij->donotstash) {
951a2d1c673SSatish Balay     while (1) {
9528798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
953a2d1c673SSatish Balay       if (!flg) break;
954a2d1c673SSatish Balay 
955bbb85fb3SSatish Balay       for (i=0; i<n;) {
956bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
957bbb85fb3SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
958bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
959bbb85fb3SSatish Balay         else       ncols = n-i;
960bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
96193fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
962bbb85fb3SSatish Balay         i = j;
963bbb85fb3SSatish Balay       }
964bbb85fb3SSatish Balay     }
9658798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
966a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
967a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
968a2d1c673SSatish Balay        restore the original flags */
969a2d1c673SSatish Balay     r1 = baij->roworiented;
970a2d1c673SSatish Balay     r2 = a->roworiented;
971a2d1c673SSatish Balay     r3 = b->roworiented;
9727c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
9737c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
9747c922b88SBarry Smith     b->roworiented    = PETSC_FALSE;
975a2d1c673SSatish Balay     while (1) {
9768798bf22SSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
977a2d1c673SSatish Balay       if (!flg) break;
978a2d1c673SSatish Balay 
979a2d1c673SSatish Balay       for (i=0; i<n;) {
980a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
981a2d1c673SSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
982a2d1c673SSatish Balay         if (j < n) ncols = j-i;
983a2d1c673SSatish Balay         else       ncols = n-i;
98493fea6afSBarry Smith         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
985a2d1c673SSatish Balay         i = j;
986a2d1c673SSatish Balay       }
987a2d1c673SSatish Balay     }
9888798bf22SSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
989a2d1c673SSatish Balay     baij->roworiented = r1;
990a2d1c673SSatish Balay     a->roworiented    = r2;
991a2d1c673SSatish Balay     b->roworiented    = r3;
992bbb85fb3SSatish Balay   }
993bbb85fb3SSatish Balay 
994bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
995bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
996bbb85fb3SSatish Balay 
997bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
998bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
999bbb85fb3SSatish Balay   /*
1000bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1001bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1002bbb85fb3SSatish Balay   */
1003bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1004bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1005bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1006bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1007bbb85fb3SSatish Balay     }
1008bbb85fb3SSatish Balay   }
1009bbb85fb3SSatish Balay 
1010bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1011bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1012bbb85fb3SSatish Balay   }
1013bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1014bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1015bbb85fb3SSatish Balay 
1016aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g)
1017bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1018c38d4ed2SBarry Smith     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1019bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1020bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1021bbb85fb3SSatish Balay   }
1022bbb85fb3SSatish Balay #endif
1023bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1024bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1025bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1026bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1027bbb85fb3SSatish Balay   }
1028bbb85fb3SSatish Balay 
1029606d414cSSatish Balay   if (baij->rowvalues) {
1030606d414cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1031606d414cSSatish Balay     baij->rowvalues = 0;
1032606d414cSSatish Balay   }
1033bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1034bbb85fb3SSatish Balay }
103557b952d6SSatish Balay 
10365615d1e5SSatish Balay #undef __FUNC__
1037b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket"
10387b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
103957b952d6SSatish Balay {
104057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
104177ed5343SBarry Smith   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
10426831982aSBarry Smith   PetscTruth   isascii,isdraw;
104357b952d6SSatish Balay 
1044d64ed03dSBarry Smith   PetscFunctionBegin;
10456831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
10466831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
10470f5bd95cSBarry Smith   if (isascii) {
1048d41123aaSBarry Smith     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1049639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
10504e220ebcSLois Curfman McInnes       MatInfo info;
1051d132466eSBarry Smith       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1052d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
10536831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
10544e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
10556831982aSBarry Smith               baij->bs,(int)info.memory);CHKERRQ(ierr);
1056d132466eSBarry Smith       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
10576831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1058d132466eSBarry Smith       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
10596831982aSBarry Smith       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
10606831982aSBarry Smith       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
106157b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
10623a40ed3dSBarry Smith       PetscFunctionReturn(0);
1063d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
10646831982aSBarry Smith       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
10653a40ed3dSBarry Smith       PetscFunctionReturn(0);
106657b952d6SSatish Balay     }
106757b952d6SSatish Balay   }
106857b952d6SSatish Balay 
10690f5bd95cSBarry Smith   if (isdraw) {
107057b952d6SSatish Balay     Draw       draw;
107157b952d6SSatish Balay     PetscTruth isnull;
107277ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
10733a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
107457b952d6SSatish Balay   }
107557b952d6SSatish Balay 
107657b952d6SSatish Balay   if (size == 1) {
107757b952d6SSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1078d64ed03dSBarry Smith   } else {
107957b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
108057b952d6SSatish Balay     Mat         A;
108157b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
10823eda8832SBarry Smith     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
10833eda8832SBarry Smith     MatScalar   *a;
108457b952d6SSatish Balay 
108557b952d6SSatish Balay     if (!rank) {
108655843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1087d64ed03dSBarry Smith     } else {
108855843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
108957b952d6SSatish Balay     }
109057b952d6SSatish Balay     PLogObjectParent(mat,A);
109157b952d6SSatish Balay 
109257b952d6SSatish Balay     /* copy over the A part */
109357b952d6SSatish Balay     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
109457b952d6SSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
109557b952d6SSatish Balay     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
109657b952d6SSatish Balay 
109757b952d6SSatish Balay     for (i=0; i<mbs; i++) {
109857b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
109957b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
110057b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
110157b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
110257b952d6SSatish Balay         for (k=0; k<bs; k++) {
110393fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1104cee3aa6bSSatish Balay           col++; a += bs;
110557b952d6SSatish Balay         }
110657b952d6SSatish Balay       }
110757b952d6SSatish Balay     }
110857b952d6SSatish Balay     /* copy over the B part */
110957b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
111057b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
111157b952d6SSatish Balay     for (i=0; i<mbs; i++) {
111257b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
111357b952d6SSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
111457b952d6SSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
111557b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
111657b952d6SSatish Balay         for (k=0; k<bs; k++) {
111793fea6afSBarry Smith           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1118cee3aa6bSSatish Balay           col++; a += bs;
111957b952d6SSatish Balay         }
112057b952d6SSatish Balay       }
112157b952d6SSatish Balay     }
1122606d414cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
11236d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
11246d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
112555843e3eSBarry Smith     /*
112655843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
112755843e3eSBarry Smith        synchronized across all processors that share the Draw object
112855843e3eSBarry Smith     */
11296831982aSBarry Smith     if (!rank) {
11306831982aSBarry Smith       Viewer sviewer;
11316831982aSBarry Smith       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
11326831982aSBarry Smith       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
11336831982aSBarry Smith       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
113457b952d6SSatish Balay     }
113557b952d6SSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
113657b952d6SSatish Balay   }
11373a40ed3dSBarry Smith   PetscFunctionReturn(0);
113857b952d6SSatish Balay }
113957b952d6SSatish Balay 
11405615d1e5SSatish Balay #undef __FUNC__
1141b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ"
1142e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer)
114357b952d6SSatish Balay {
114457b952d6SSatish Balay   int        ierr;
11456831982aSBarry Smith   PetscTruth isascii,isdraw,issocket,isbinary;
114657b952d6SSatish Balay 
1147d64ed03dSBarry Smith   PetscFunctionBegin;
11486831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
11496831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
11506831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
11516831982aSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
11520f5bd95cSBarry Smith   if (isascii || isdraw || issocket || isbinary) {
11537b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
11545cd90555SBarry Smith   } else {
11550f5bd95cSBarry Smith     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
115657b952d6SSatish Balay   }
11573a40ed3dSBarry Smith   PetscFunctionReturn(0);
115857b952d6SSatish Balay }
115957b952d6SSatish Balay 
11605615d1e5SSatish Balay #undef __FUNC__
1161b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ"
1162e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
116379bdfe76SSatish Balay {
116479bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
116579bdfe76SSatish Balay   int         ierr;
116679bdfe76SSatish Balay 
1167d64ed03dSBarry Smith   PetscFunctionBegin;
116898dd23e9SBarry Smith 
116998dd23e9SBarry Smith   if (mat->mapping) {
117098dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
117198dd23e9SBarry Smith   }
117298dd23e9SBarry Smith   if (mat->bmapping) {
117398dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
117498dd23e9SBarry Smith   }
117561b13de0SBarry Smith   if (mat->rmap) {
117661b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
117761b13de0SBarry Smith   }
117861b13de0SBarry Smith   if (mat->cmap) {
117961b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
118061b13de0SBarry Smith   }
1181aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1182e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
118379bdfe76SSatish Balay #endif
118479bdfe76SSatish Balay 
11858798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
11868798bf22SSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
11878798bf22SSatish Balay 
1188606d414cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
118979bdfe76SSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
119079bdfe76SSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1191aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
11920f5bd95cSBarry Smith   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
119348e59246SSatish Balay #else
1194606d414cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
119548e59246SSatish Balay #endif
1196606d414cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1197606d414cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1198606d414cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1199606d414cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1200606d414cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1201606d414cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1202*6fa18ffdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
1203*6fa18ffdSBarry Smith   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1204*6fa18ffdSBarry Smith #endif
1205606d414cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
120679bdfe76SSatish Balay   PLogObjectDestroy(mat);
120779bdfe76SSatish Balay   PetscHeaderDestroy(mat);
12083a40ed3dSBarry Smith   PetscFunctionReturn(0);
120979bdfe76SSatish Balay }
121079bdfe76SSatish Balay 
12115615d1e5SSatish Balay #undef __FUNC__
1212b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ"
1213ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1214cee3aa6bSSatish Balay {
1215cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
121647b4a8eaSLois Curfman McInnes   int         ierr,nt;
1217cee3aa6bSSatish Balay 
1218d64ed03dSBarry Smith   PetscFunctionBegin;
1219e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
122047b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1221a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
122247b4a8eaSLois Curfman McInnes   }
1223e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
122447b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1225a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
122647b4a8eaSLois Curfman McInnes   }
122743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1228f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
122943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1230f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
123143a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
12323a40ed3dSBarry Smith   PetscFunctionReturn(0);
1233cee3aa6bSSatish Balay }
1234cee3aa6bSSatish Balay 
12355615d1e5SSatish Balay #undef __FUNC__
1236b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ"
1237ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1238cee3aa6bSSatish Balay {
1239cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1240cee3aa6bSSatish Balay   int        ierr;
1241d64ed03dSBarry Smith 
1242d64ed03dSBarry Smith   PetscFunctionBegin;
124343a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1244f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
124543a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1246f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
12473a40ed3dSBarry Smith   PetscFunctionReturn(0);
1248cee3aa6bSSatish Balay }
1249cee3aa6bSSatish Balay 
12505615d1e5SSatish Balay #undef __FUNC__
1251b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ"
12527c922b88SBarry Smith int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1253cee3aa6bSSatish Balay {
1254cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1255cee3aa6bSSatish Balay   int         ierr;
1256cee3aa6bSSatish Balay 
1257d64ed03dSBarry Smith   PetscFunctionBegin;
1258cee3aa6bSSatish Balay   /* do nondiagonal part */
12597c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1260cee3aa6bSSatish Balay   /* send it on its way */
1261537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1262cee3aa6bSSatish Balay   /* do local part */
12637c922b88SBarry Smith   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1264cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1265cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1266cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1267639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12683a40ed3dSBarry Smith   PetscFunctionReturn(0);
1269cee3aa6bSSatish Balay }
1270cee3aa6bSSatish Balay 
12715615d1e5SSatish Balay #undef __FUNC__
1272b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ"
12737c922b88SBarry Smith int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1274cee3aa6bSSatish Balay {
1275cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1276cee3aa6bSSatish Balay   int         ierr;
1277cee3aa6bSSatish Balay 
1278d64ed03dSBarry Smith   PetscFunctionBegin;
1279cee3aa6bSSatish Balay   /* do nondiagonal part */
12807c922b88SBarry Smith   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1281cee3aa6bSSatish Balay   /* send it on its way */
1282537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1283cee3aa6bSSatish Balay   /* do local part */
12847c922b88SBarry Smith   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1285cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1286cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1287cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1288537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12893a40ed3dSBarry Smith   PetscFunctionReturn(0);
1290cee3aa6bSSatish Balay }
1291cee3aa6bSSatish Balay 
1292cee3aa6bSSatish Balay /*
1293cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1294cee3aa6bSSatish Balay    diagonal block
1295cee3aa6bSSatish Balay */
12965615d1e5SSatish Balay #undef __FUNC__
1297b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ"
1298ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1299cee3aa6bSSatish Balay {
1300cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
13013a40ed3dSBarry Smith   int         ierr;
1302d64ed03dSBarry Smith 
1303d64ed03dSBarry Smith   PetscFunctionBegin;
1304a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
13053a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13063a40ed3dSBarry Smith   PetscFunctionReturn(0);
1307cee3aa6bSSatish Balay }
1308cee3aa6bSSatish Balay 
13095615d1e5SSatish Balay #undef __FUNC__
1310b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ"
1311ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1312cee3aa6bSSatish Balay {
1313cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1314cee3aa6bSSatish Balay   int         ierr;
1315d64ed03dSBarry Smith 
1316d64ed03dSBarry Smith   PetscFunctionBegin;
1317cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1318cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
13193a40ed3dSBarry Smith   PetscFunctionReturn(0);
1320cee3aa6bSSatish Balay }
1321026e39d0SSatish Balay 
13225615d1e5SSatish Balay #undef __FUNC__
1323b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ"
1324ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
132557b952d6SSatish Balay {
132657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1327d64ed03dSBarry Smith 
1328d64ed03dSBarry Smith   PetscFunctionBegin;
1329bd7f49f5SSatish Balay   if (m) *m = mat->M;
1330bd7f49f5SSatish Balay   if (n) *n = mat->N;
13313a40ed3dSBarry Smith   PetscFunctionReturn(0);
133257b952d6SSatish Balay }
133357b952d6SSatish Balay 
13345615d1e5SSatish Balay #undef __FUNC__
1335b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ"
1336ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
133757b952d6SSatish Balay {
133857b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1339d64ed03dSBarry Smith 
1340d64ed03dSBarry Smith   PetscFunctionBegin;
1341f830108cSBarry Smith   *m = mat->m; *n = mat->n;
13423a40ed3dSBarry Smith   PetscFunctionReturn(0);
134357b952d6SSatish Balay }
134457b952d6SSatish Balay 
13455615d1e5SSatish Balay #undef __FUNC__
1346b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ"
1347ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
134857b952d6SSatish Balay {
134957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1350d64ed03dSBarry Smith 
1351d64ed03dSBarry Smith   PetscFunctionBegin;
1352a21fb8cbSBarry Smith   if (m) *m = mat->rstart*mat->bs;
1353a21fb8cbSBarry Smith   if (n) *n = mat->rend*mat->bs;
13543a40ed3dSBarry Smith   PetscFunctionReturn(0);
135557b952d6SSatish Balay }
135657b952d6SSatish Balay 
13575615d1e5SSatish Balay #undef __FUNC__
1358b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ"
1359acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1360acdf5bf4SSatish Balay {
1361acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1362acdf5bf4SSatish Balay   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1363acdf5bf4SSatish Balay   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1364d9d09a02SSatish Balay   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1365d9d09a02SSatish Balay   int        *cmap,*idx_p,cstart = mat->cstart;
1366acdf5bf4SSatish Balay 
1367d64ed03dSBarry Smith   PetscFunctionBegin;
1368a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1369acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1370acdf5bf4SSatish Balay 
1371acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1372acdf5bf4SSatish Balay     /*
1373acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1374acdf5bf4SSatish Balay     */
1375acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1376bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1377bd16c2feSSatish Balay     for (i=0; i<mbs; i++) {
1378acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1379acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1380acdf5bf4SSatish Balay     }
1381549d3d68SSatish Balay     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1382acdf5bf4SSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1383acdf5bf4SSatish Balay   }
1384acdf5bf4SSatish Balay 
1385a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1386d9d09a02SSatish Balay   lrow = row - brstart;
1387acdf5bf4SSatish Balay 
1388acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1389acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1390acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1391f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1392f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1393acdf5bf4SSatish Balay   nztot = nzA + nzB;
1394acdf5bf4SSatish Balay 
1395acdf5bf4SSatish Balay   cmap  = mat->garray;
1396acdf5bf4SSatish Balay   if (v  || idx) {
1397acdf5bf4SSatish Balay     if (nztot) {
1398acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1399acdf5bf4SSatish Balay       int imark = -1;
1400acdf5bf4SSatish Balay       if (v) {
1401acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1402acdf5bf4SSatish Balay         for (i=0; i<nzB; i++) {
1403d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1404acdf5bf4SSatish Balay           else break;
1405acdf5bf4SSatish Balay         }
1406acdf5bf4SSatish Balay         imark = i;
1407acdf5bf4SSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1408acdf5bf4SSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1409acdf5bf4SSatish Balay       }
1410acdf5bf4SSatish Balay       if (idx) {
1411acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1412acdf5bf4SSatish Balay         if (imark > -1) {
1413acdf5bf4SSatish Balay           for (i=0; i<imark; i++) {
1414bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1415acdf5bf4SSatish Balay           }
1416acdf5bf4SSatish Balay         } else {
1417acdf5bf4SSatish Balay           for (i=0; i<nzB; i++) {
1418d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1419d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1420acdf5bf4SSatish Balay             else break;
1421acdf5bf4SSatish Balay           }
1422acdf5bf4SSatish Balay           imark = i;
1423acdf5bf4SSatish Balay         }
1424d9d09a02SSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1425d9d09a02SSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1426acdf5bf4SSatish Balay       }
1427d64ed03dSBarry Smith     } else {
1428d212a18eSSatish Balay       if (idx) *idx = 0;
1429d212a18eSSatish Balay       if (v)   *v   = 0;
1430d212a18eSSatish Balay     }
1431acdf5bf4SSatish Balay   }
1432acdf5bf4SSatish Balay   *nz = nztot;
1433f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1434f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
14353a40ed3dSBarry Smith   PetscFunctionReturn(0);
1436acdf5bf4SSatish Balay }
1437acdf5bf4SSatish Balay 
14385615d1e5SSatish Balay #undef __FUNC__
1439b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ"
1440acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1441acdf5bf4SSatish Balay {
1442acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1443d64ed03dSBarry Smith 
1444d64ed03dSBarry Smith   PetscFunctionBegin;
1445acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1446a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1447acdf5bf4SSatish Balay   }
1448acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14493a40ed3dSBarry Smith   PetscFunctionReturn(0);
1450acdf5bf4SSatish Balay }
1451acdf5bf4SSatish Balay 
14525615d1e5SSatish Balay #undef __FUNC__
1453b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ"
1454ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
14555a838052SSatish Balay {
14565a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1457d64ed03dSBarry Smith 
1458d64ed03dSBarry Smith   PetscFunctionBegin;
14595a838052SSatish Balay   *bs = baij->bs;
14603a40ed3dSBarry Smith   PetscFunctionReturn(0);
14615a838052SSatish Balay }
14625a838052SSatish Balay 
14635615d1e5SSatish Balay #undef __FUNC__
1464b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ"
1465ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
146658667388SSatish Balay {
146758667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
146858667388SSatish Balay   int         ierr;
1469d64ed03dSBarry Smith 
1470d64ed03dSBarry Smith   PetscFunctionBegin;
147158667388SSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
147258667388SSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
14733a40ed3dSBarry Smith   PetscFunctionReturn(0);
147458667388SSatish Balay }
14750ac07820SSatish Balay 
14765615d1e5SSatish Balay #undef __FUNC__
1477b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ"
1478ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14790ac07820SSatish Balay {
14804e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
14814e220ebcSLois Curfman McInnes   Mat         A = a->A,B = a->B;
14827d57db60SLois Curfman McInnes   int         ierr;
1483329f5518SBarry Smith   PetscReal   isend[5],irecv[5];
14840ac07820SSatish Balay 
1485d64ed03dSBarry Smith   PetscFunctionBegin;
14864e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
14874e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
14880e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1489de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14904e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
14910e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1492de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
14930ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14944e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14954e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14964e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14974e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14984e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14990ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1500f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
15014e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15024e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15034e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15044e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15054e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
15060ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1507f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
15084e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15094e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15104e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15114e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15124e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1513d41123aaSBarry Smith   } else {
1514d41123aaSBarry Smith     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
15150ac07820SSatish Balay   }
15165a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
15175a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
15185a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
15195a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
15204e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
15214e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
15224e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
15233a40ed3dSBarry Smith   PetscFunctionReturn(0);
15240ac07820SSatish Balay }
15250ac07820SSatish Balay 
15265615d1e5SSatish Balay #undef __FUNC__
1527b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ"
1528ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
152958667388SSatish Balay {
153058667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
153198305bb5SBarry Smith   int         ierr;
153258667388SSatish Balay 
1533d64ed03dSBarry Smith   PetscFunctionBegin;
153458667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
153558667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
15366da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1537c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
15384787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
15397c922b88SBarry Smith       op == MAT_KEEP_ZEROED_ROWS ||
15404787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
154198305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
154298305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1543b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
15447c922b88SBarry Smith         a->roworiented = PETSC_TRUE;
154598305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
154698305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1547b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
15486da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
154958667388SSatish Balay              op == MAT_SYMMETRIC ||
155058667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1551b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
155298305bb5SBarry Smith              op == MAT_USE_HASH_TABLE) {
155358667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
155498305bb5SBarry Smith   } else if (op == MAT_COLUMN_ORIENTED) {
15557c922b88SBarry Smith     a->roworiented = PETSC_FALSE;
155698305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
155798305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
15582b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
15597c922b88SBarry Smith     a->donotstash = PETSC_TRUE;
1560d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1561d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1562133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
15637c922b88SBarry Smith     a->ht_flag = PETSC_TRUE;
1564d64ed03dSBarry Smith   } else {
1565d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1566d64ed03dSBarry Smith   }
15673a40ed3dSBarry Smith   PetscFunctionReturn(0);
156858667388SSatish Balay }
156958667388SSatish Balay 
15705615d1e5SSatish Balay #undef __FUNC__
1571b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ("
1572ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15730ac07820SSatish Balay {
15740ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
15750ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15760ac07820SSatish Balay   Mat         B;
157740011551SBarry Smith   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
15780ac07820SSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
15793eda8832SBarry Smith   MatScalar   *a;
15800ac07820SSatish Balay 
1581d64ed03dSBarry Smith   PetscFunctionBegin;
15827c922b88SBarry Smith   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
15837c922b88SBarry Smith   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
15840ac07820SSatish Balay 
15850ac07820SSatish Balay   /* copy over the A part */
15860ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
15870ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15880ac07820SSatish Balay   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
15890ac07820SSatish Balay 
15900ac07820SSatish Balay   for (i=0; i<mbs; i++) {
15910ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15920ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
15930ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
15940ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
15950ac07820SSatish Balay       for (k=0; k<bs; k++) {
159693fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15970ac07820SSatish Balay         col++; a += bs;
15980ac07820SSatish Balay       }
15990ac07820SSatish Balay     }
16000ac07820SSatish Balay   }
16010ac07820SSatish Balay   /* copy over the B part */
16020ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
16030ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
16040ac07820SSatish Balay   for (i=0; i<mbs; i++) {
16050ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16060ac07820SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
16070ac07820SSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
16080ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16090ac07820SSatish Balay       for (k=0; k<bs; k++) {
161093fea6afSBarry Smith         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16110ac07820SSatish Balay         col++; a += bs;
16120ac07820SSatish Balay       }
16130ac07820SSatish Balay     }
16140ac07820SSatish Balay   }
1615606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
16160ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16170ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16180ac07820SSatish Balay 
16197c922b88SBarry Smith   if (matout) {
16200ac07820SSatish Balay     *matout = B;
16210ac07820SSatish Balay   } else {
1622f830108cSBarry Smith     PetscOps *Abops;
1623cc2dc46cSBarry Smith     MatOps   Aops;
1624f830108cSBarry Smith 
16250ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
1626606d414cSSatish Balay     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
16270ac07820SSatish Balay     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
16280ac07820SSatish Balay     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1629aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
16300f5bd95cSBarry Smith     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1631b1fc9764SSatish Balay #else
1632606d414cSSatish Balay     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1633b1fc9764SSatish Balay #endif
1634606d414cSSatish Balay     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1635606d414cSSatish Balay     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1636606d414cSSatish Balay     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1637606d414cSSatish Balay     ierr = PetscFree(baij);CHKERRQ(ierr);
1638f830108cSBarry Smith 
1639f830108cSBarry Smith     /*
1640f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1641f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1642f830108cSBarry Smith       else from C.
1643f830108cSBarry Smith     */
1644f830108cSBarry Smith     Abops   = A->bops;
1645f830108cSBarry Smith     Aops    = A->ops;
1646549d3d68SSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1647f830108cSBarry Smith     A->bops = Abops;
1648f830108cSBarry Smith     A->ops  = Aops;
1649f830108cSBarry Smith 
16500ac07820SSatish Balay     PetscHeaderDestroy(B);
16510ac07820SSatish Balay   }
16523a40ed3dSBarry Smith   PetscFunctionReturn(0);
16530ac07820SSatish Balay }
16540e95ebc0SSatish Balay 
16555615d1e5SSatish Balay #undef __FUNC__
1656b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ"
165736c4a09eSSatish Balay int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
16580e95ebc0SSatish Balay {
165936c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
166036c4a09eSSatish Balay   Mat         a = baij->A,b = baij->B;
16610e95ebc0SSatish Balay   int         ierr,s1,s2,s3;
16620e95ebc0SSatish Balay 
1663d64ed03dSBarry Smith   PetscFunctionBegin;
166436c4a09eSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
166536c4a09eSSatish Balay   if (rr) {
166636c4a09eSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
166736c4a09eSSatish Balay     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
166836c4a09eSSatish Balay     /* Overlap communication with computation. */
166936c4a09eSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
167036c4a09eSSatish Balay   }
16710e95ebc0SSatish Balay   if (ll) {
16720e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
167336c4a09eSSatish Balay     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1674a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
16750e95ebc0SSatish Balay   }
167636c4a09eSSatish Balay   /* scale  the diagonal block */
167736c4a09eSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
167836c4a09eSSatish Balay 
167936c4a09eSSatish Balay   if (rr) {
168036c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
168136c4a09eSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1682a21fb8cbSBarry Smith     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
168336c4a09eSSatish Balay   }
168436c4a09eSSatish Balay 
16853a40ed3dSBarry Smith   PetscFunctionReturn(0);
16860e95ebc0SSatish Balay }
16870e95ebc0SSatish Balay 
16885615d1e5SSatish Balay #undef __FUNC__
1689b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ"
1690ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
16910ac07820SSatish Balay {
16920ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
16930ac07820SSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1694a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
16950ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
16960ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1697a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16980ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16990ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
17000ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
17010ac07820SSatish Balay   IS             istmp;
17020ac07820SSatish Balay 
1703d64ed03dSBarry Smith   PetscFunctionBegin;
17040ac07820SSatish Balay   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
17050ac07820SSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
17060ac07820SSatish Balay 
17070ac07820SSatish Balay   /*  first count number of contributors to each processor */
17080ac07820SSatish Balay   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1709549d3d68SSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1710549d3d68SSatish Balay   procs  = nprocs + size;
17110ac07820SSatish Balay   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
17120ac07820SSatish Balay   for (i=0; i<N; i++) {
17130ac07820SSatish Balay     idx   = rows[i];
17140ac07820SSatish Balay     found = 0;
17150ac07820SSatish Balay     for (j=0; j<size; j++) {
17160ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
17170ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
17180ac07820SSatish Balay       }
17190ac07820SSatish Balay     }
1720a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
17210ac07820SSatish Balay   }
17220ac07820SSatish Balay   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
17230ac07820SSatish Balay 
17240ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
17256831982aSBarry Smith   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
17266831982aSBarry Smith   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
17270ac07820SSatish Balay   nmax   = work[rank];
17286831982aSBarry Smith   nrecvs = work[size+rank];
1729606d414cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
17300ac07820SSatish Balay 
17310ac07820SSatish Balay   /* post receives:   */
1732d64ed03dSBarry Smith   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1733d64ed03dSBarry Smith   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
17340ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
1735ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
17360ac07820SSatish Balay   }
17370ac07820SSatish Balay 
17380ac07820SSatish Balay   /* do sends:
17390ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
17400ac07820SSatish Balay      the ith processor
17410ac07820SSatish Balay   */
17420ac07820SSatish Balay   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1743ca161407SBarry Smith   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
17440ac07820SSatish Balay   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
17450ac07820SSatish Balay   starts[0]  = 0;
17460ac07820SSatish Balay   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
17470ac07820SSatish Balay   for (i=0; i<N; i++) {
17480ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
17490ac07820SSatish Balay   }
17506831982aSBarry Smith   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
17510ac07820SSatish Balay 
17520ac07820SSatish Balay   starts[0] = 0;
17530ac07820SSatish Balay   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
17540ac07820SSatish Balay   count = 0;
17550ac07820SSatish Balay   for (i=0; i<size; i++) {
17560ac07820SSatish Balay     if (procs[i]) {
1757ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17580ac07820SSatish Balay     }
17590ac07820SSatish Balay   }
1760606d414cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
17610ac07820SSatish Balay 
17620ac07820SSatish Balay   base = owners[rank]*bs;
17630ac07820SSatish Balay 
17640ac07820SSatish Balay   /*  wait on receives */
17650ac07820SSatish Balay   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
17660ac07820SSatish Balay   source = lens + nrecvs;
17670ac07820SSatish Balay   count  = nrecvs; slen = 0;
17680ac07820SSatish Balay   while (count) {
1769ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17700ac07820SSatish Balay     /* unpack receives into our local space */
1771ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
17720ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17730ac07820SSatish Balay     lens[imdex]    = n;
17740ac07820SSatish Balay     slen          += n;
17750ac07820SSatish Balay     count--;
17760ac07820SSatish Balay   }
1777606d414cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
17780ac07820SSatish Balay 
17790ac07820SSatish Balay   /* move the data into the send scatter */
17800ac07820SSatish Balay   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
17810ac07820SSatish Balay   count = 0;
17820ac07820SSatish Balay   for (i=0; i<nrecvs; i++) {
17830ac07820SSatish Balay     values = rvalues + i*nmax;
17840ac07820SSatish Balay     for (j=0; j<lens[i]; j++) {
17850ac07820SSatish Balay       lrows[count++] = values[j] - base;
17860ac07820SSatish Balay     }
17870ac07820SSatish Balay   }
1788606d414cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1789606d414cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1790606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1791606d414cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
17920ac07820SSatish Balay 
17930ac07820SSatish Balay   /* actually zap the local rows */
1794029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
17950ac07820SSatish Balay   PLogObjectParent(A,istmp);
1796a07cd24cSSatish Balay 
179772dacd9aSBarry Smith   /*
179872dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
179972dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
180072dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
180172dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
180272dacd9aSBarry Smith 
180372dacd9aSBarry Smith        Contributed by: Mathew Knepley
180472dacd9aSBarry Smith   */
18059c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1806*6fa18ffdSBarry Smith   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
18079c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
1808*6fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
18099c957beeSSatish Balay   } else if (diag) {
1810*6fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1811fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1812fa46199cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1813fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
18146525c446SSatish Balay     }
1815a07cd24cSSatish Balay     for (i = 0; i < slen; i++) {
1816a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
18173eda8832SBarry Smith       ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1818a07cd24cSSatish Balay     }
1819a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1820a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
18219c957beeSSatish Balay   } else {
1822*6fa18ffdSBarry Smith     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1823a07cd24cSSatish Balay   }
18249c957beeSSatish Balay 
18259c957beeSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1826606d414cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1827a07cd24cSSatish Balay 
18280ac07820SSatish Balay   /* wait on sends */
18290ac07820SSatish Balay   if (nsends) {
1830d64ed03dSBarry Smith     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1831ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1832606d414cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
18330ac07820SSatish Balay   }
1834606d414cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1835606d414cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
18360ac07820SSatish Balay 
18373a40ed3dSBarry Smith   PetscFunctionReturn(0);
18380ac07820SSatish Balay }
183972dacd9aSBarry Smith 
18405615d1e5SSatish Balay #undef __FUNC__
1841b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ"
1842ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1843ba4ca20aSSatish Balay {
1844ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
184525fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1846133cdb44SSatish Balay   static int  called = 0;
18473a40ed3dSBarry Smith   int         ierr;
1848ba4ca20aSSatish Balay 
1849d64ed03dSBarry Smith   PetscFunctionBegin;
18503a40ed3dSBarry Smith   if (!a->rank) {
18513a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
185225fdafccSSatish Balay   }
185325fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1854d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1855d132466eSBarry Smith   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
18563a40ed3dSBarry Smith   PetscFunctionReturn(0);
1857ba4ca20aSSatish Balay }
18580ac07820SSatish Balay 
18595615d1e5SSatish Balay #undef __FUNC__
1860b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ"
1861ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1862bb5a7306SBarry Smith {
1863bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1864bb5a7306SBarry Smith   int         ierr;
1865d64ed03dSBarry Smith 
1866d64ed03dSBarry Smith   PetscFunctionBegin;
1867bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
18683a40ed3dSBarry Smith   PetscFunctionReturn(0);
1869bb5a7306SBarry Smith }
1870bb5a7306SBarry Smith 
18712e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18720ac07820SSatish Balay 
18737fc3c18eSBarry Smith #undef __FUNC__
1874b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ"
18757fc3c18eSBarry Smith int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
18767fc3c18eSBarry Smith {
18777fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
18787fc3c18eSBarry Smith   Mat         a,b,c,d;
18797fc3c18eSBarry Smith   PetscTruth  flg;
18807fc3c18eSBarry Smith   int         ierr;
18817fc3c18eSBarry Smith 
18827fc3c18eSBarry Smith   PetscFunctionBegin;
18837fc3c18eSBarry Smith   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
18847fc3c18eSBarry Smith   a = matA->A; b = matA->B;
18857fc3c18eSBarry Smith   c = matB->A; d = matB->B;
18867fc3c18eSBarry Smith 
18877fc3c18eSBarry Smith   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
18887fc3c18eSBarry Smith   if (flg == PETSC_TRUE) {
18897fc3c18eSBarry Smith     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
18907fc3c18eSBarry Smith   }
18917fc3c18eSBarry Smith   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
18927fc3c18eSBarry Smith   PetscFunctionReturn(0);
18937fc3c18eSBarry Smith }
18947fc3c18eSBarry Smith 
189579bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1896cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1897cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1898cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1899cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1900cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1901cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
19027c922b88SBarry Smith   MatMultTranspose_MPIBAIJ,
19037c922b88SBarry Smith   MatMultTransposeAdd_MPIBAIJ,
1904cc2dc46cSBarry Smith   0,
1905cc2dc46cSBarry Smith   0,
1906cc2dc46cSBarry Smith   0,
1907cc2dc46cSBarry Smith   0,
1908cc2dc46cSBarry Smith   0,
1909cc2dc46cSBarry Smith   0,
1910cc2dc46cSBarry Smith   0,
1911cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1912cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
19137fc3c18eSBarry Smith   MatEqual_MPIBAIJ,
1914cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1915cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1916cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1917cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1918cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1919cc2dc46cSBarry Smith   0,
1920cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1921cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1922cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1923cc2dc46cSBarry Smith   0,
1924cc2dc46cSBarry Smith   0,
1925cc2dc46cSBarry Smith   0,
1926cc2dc46cSBarry Smith   0,
1927cc2dc46cSBarry Smith   MatGetSize_MPIBAIJ,
1928cc2dc46cSBarry Smith   MatGetLocalSize_MPIBAIJ,
1929cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1930cc2dc46cSBarry Smith   0,
1931cc2dc46cSBarry Smith   0,
1932cc2dc46cSBarry Smith   0,
1933cc2dc46cSBarry Smith   0,
19342e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1935cc2dc46cSBarry Smith   0,
1936cc2dc46cSBarry Smith   0,
1937cc2dc46cSBarry Smith   0,
1938cc2dc46cSBarry Smith   0,
1939cc2dc46cSBarry Smith   0,
1940cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1941cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1942cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1943cc2dc46cSBarry Smith   0,
1944cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1945cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1946cc2dc46cSBarry Smith   0,
1947cc2dc46cSBarry Smith   0,
1948cc2dc46cSBarry Smith   0,
1949cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1950cc2dc46cSBarry Smith   0,
1951cc2dc46cSBarry Smith   0,
1952cc2dc46cSBarry Smith   0,
1953cc2dc46cSBarry Smith   0,
1954cc2dc46cSBarry Smith   0,
1955cc2dc46cSBarry Smith   0,
1956cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
1957cc2dc46cSBarry Smith   0,
1958cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
1959cc2dc46cSBarry Smith   0,
1960cc2dc46cSBarry Smith   0,
1961cc2dc46cSBarry Smith   0,
1962cc2dc46cSBarry Smith   MatGetMaps_Petsc};
196379bdfe76SSatish Balay 
19645ef9f2a5SBarry Smith 
1965e18c124aSSatish Balay EXTERN_C_BEGIN
19665ef9f2a5SBarry Smith #undef __FUNC__
1967b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ"
19685ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
19695ef9f2a5SBarry Smith {
19705ef9f2a5SBarry Smith   PetscFunctionBegin;
19715ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
19725ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
19735ef9f2a5SBarry Smith   PetscFunctionReturn(0);
19745ef9f2a5SBarry Smith }
1975e18c124aSSatish Balay EXTERN_C_END
197679bdfe76SSatish Balay 
19775615d1e5SSatish Balay #undef __FUNC__
1978b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ"
197979bdfe76SSatish Balay /*@C
198079bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
198179bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
198279bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
198379bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
198479bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
198579bdfe76SSatish Balay 
1986db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1987db81eaa0SLois Curfman McInnes 
198879bdfe76SSatish Balay    Input Parameters:
1989db81eaa0SLois Curfman McInnes +  comm - MPI communicator
199079bdfe76SSatish Balay .  bs   - size of blockk
199179bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
199292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
199392e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
199492e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
199592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
199692e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
1997be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1998be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
199979bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
200079bdfe76SSatish Balay            submatrix  (same for all local rows)
20017fc3c18eSBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
200292e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2003db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
200492e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
200579bdfe76SSatish Balay            submatrix (same for all local rows).
20067fc3c18eSBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
200792e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
200892e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
200979bdfe76SSatish Balay 
201079bdfe76SSatish Balay    Output Parameter:
201179bdfe76SSatish Balay .  A - the matrix
201279bdfe76SSatish Balay 
2013db81eaa0SLois Curfman McInnes    Options Database Keys:
2014db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
2015db81eaa0SLois Curfman McInnes                      block calculations (much slower)
2016db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
2017494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
2018494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
20193ffaccefSLois Curfman McInnes 
2020b259b22eSLois Curfman McInnes    Notes:
202179bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
202279bdfe76SSatish Balay    (possibly both).
202379bdfe76SSatish Balay 
2024be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2025be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2026be79a94dSBarry Smith 
202779bdfe76SSatish Balay    Storage Information:
202879bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
202979bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
203079bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
203179bdfe76SSatish Balay    local matrix (a rectangular submatrix).
203279bdfe76SSatish Balay 
203379bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
203479bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
203579bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
203679bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
203779bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
203879bdfe76SSatish Balay 
203979bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
204079bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
204179bdfe76SSatish Balay 
2042db81eaa0SLois Curfman McInnes .vb
2043db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2044db81eaa0SLois Curfman McInnes           -------------------
2045db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2046db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2047db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2048db81eaa0SLois Curfman McInnes           -------------------
2049db81eaa0SLois Curfman McInnes .ve
205079bdfe76SSatish Balay 
205179bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
205279bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
205379bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
205457b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
205579bdfe76SSatish Balay 
2056d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2057d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
205879bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
205992e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
206092e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
20616da5968aSLois Curfman McInnes    matrices.
206279bdfe76SSatish Balay 
2063027ccd11SLois Curfman McInnes    Level: intermediate
2064027ccd11SLois Curfman McInnes 
206592e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
206679bdfe76SSatish Balay 
20673eda8832SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
206879bdfe76SSatish Balay @*/
2069329f5518SBarry Smith int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
207079bdfe76SSatish Balay {
207179bdfe76SSatish Balay   Mat          B;
207279bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
2073f1af5d2fSBarry Smith   int          ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
2074f1af5d2fSBarry Smith   PetscTruth   flag1,flag2,flg;
207579bdfe76SSatish Balay 
2076d64ed03dSBarry Smith   PetscFunctionBegin;
2077962fb4a0SBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2078962fb4a0SBarry Smith 
2079a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
208036c4a09eSSatish Balay   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
208136c4a09eSSatish Balay   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
20824fdb0a08SBarry Smith   if (d_nnz) {
208336c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
20844fdb0a08SBarry Smith       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
20854fdb0a08SBarry Smith     }
20864fdb0a08SBarry Smith   }
20874fdb0a08SBarry Smith   if (o_nnz) {
208836c4a09eSSatish Balay     for (i=0; i<m/bs; i++) {
20894fdb0a08SBarry Smith       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
20904fdb0a08SBarry Smith     }
20914fdb0a08SBarry Smith   }
20923914022bSBarry Smith 
2093d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2094494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2095494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2096494eafd4SBarry Smith   if (!flag1 && !flag2 && size == 1) {
20973914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
20983914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
20993914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
21003a40ed3dSBarry Smith     PetscFunctionReturn(0);
21013914022bSBarry Smith   }
21023914022bSBarry Smith 
210379bdfe76SSatish Balay   *A = 0;
21043f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
210579bdfe76SSatish Balay   PLogObjectCreate(B);
210679bdfe76SSatish Balay   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2107549d3d68SSatish Balay   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2108549d3d68SSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
21094c50302cSBarry Smith 
2110e1311b90SBarry Smith   B->ops->destroy    = MatDestroy_MPIBAIJ;
2111e1311b90SBarry Smith   B->ops->view       = MatView_MPIBAIJ;
211290f02eecSBarry Smith   B->mapping    = 0;
211379bdfe76SSatish Balay   B->factor     = 0;
211479bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
211579bdfe76SSatish Balay 
2116e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
2117d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2118d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
211979bdfe76SSatish Balay 
21207c922b88SBarry Smith   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2121a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2122d64ed03dSBarry Smith   }
2123a8c6a408SBarry Smith   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2124a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2125a8c6a408SBarry Smith   }
2126a8c6a408SBarry Smith   if (N == PETSC_DECIDE && n == PETSC_DECIDE) {
2127a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2128a8c6a408SBarry Smith   }
2129cee3aa6bSSatish Balay   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2130cee3aa6bSSatish Balay   if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
213179bdfe76SSatish Balay 
213279bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
213379bdfe76SSatish Balay     work[0] = m; work[1] = n;
213479bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
2135ca161407SBarry Smith     ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
213679bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
213779bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
213879bdfe76SSatish Balay   }
213979bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
214079bdfe76SSatish Balay     Mbs = M/bs;
2141a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
214279bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
214379bdfe76SSatish Balay     m   = mbs*bs;
214479bdfe76SSatish Balay   }
214579bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
214679bdfe76SSatish Balay     Nbs = N/bs;
2147a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
214879bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
214979bdfe76SSatish Balay     n   = nbs*bs;
215079bdfe76SSatish Balay   }
2151a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
2152a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2153a8c6a408SBarry Smith   }
215479bdfe76SSatish Balay 
215579bdfe76SSatish Balay   b->m = m; B->m = m;
215679bdfe76SSatish Balay   b->n = n; B->n = n;
215779bdfe76SSatish Balay   b->N = N; B->N = N;
215879bdfe76SSatish Balay   b->M = M; B->M = M;
215979bdfe76SSatish Balay   b->bs  = bs;
216079bdfe76SSatish Balay   b->bs2 = bs*bs;
216179bdfe76SSatish Balay   b->mbs = mbs;
216279bdfe76SSatish Balay   b->nbs = nbs;
216379bdfe76SSatish Balay   b->Mbs = Mbs;
216479bdfe76SSatish Balay   b->Nbs = Nbs;
216579bdfe76SSatish Balay 
2166c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
2167c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
2168488ecbafSBarry Smith   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2169488ecbafSBarry Smith   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2170c7fcc2eaSBarry Smith 
217179bdfe76SSatish Balay   /* build local table of row and column ownerships */
2172ff2fd236SBarry Smith   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2173ff2fd236SBarry Smith   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
21740ac07820SSatish Balay   b->cowners    = b->rowners + b->size + 2;
2175ff2fd236SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2176ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
217779bdfe76SSatish Balay   b->rowners[0]    = 0;
217879bdfe76SSatish Balay   for (i=2; i<=b->size; i++) {
217979bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
218079bdfe76SSatish Balay   }
2181ff2fd236SBarry Smith   for (i=0; i<=b->size; i++) {
2182ff2fd236SBarry Smith     b->rowners_bs[i] = b->rowners[i]*bs;
2183ff2fd236SBarry Smith   }
218479bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
218579bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
21864fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
21874fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
21884fa0d573SSatish Balay 
2189ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
219079bdfe76SSatish Balay   b->cowners[0] = 0;
219179bdfe76SSatish Balay   for (i=2; i<=b->size; i++) {
219279bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
219379bdfe76SSatish Balay   }
219479bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
219579bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
21964fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
21974fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
219879bdfe76SSatish Balay 
2199a07cd24cSSatish Balay 
220079bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2201029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
220279bdfe76SSatish Balay   PLogObjectParent(B,b->A);
220379bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2204029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
220579bdfe76SSatish Balay   PLogObjectParent(B,b->B);
220679bdfe76SSatish Balay 
220779bdfe76SSatish Balay   /* build cache for off array entries formed */
22088798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
22098798bf22SSatish Balay   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
22107c922b88SBarry Smith   b->donotstash  = PETSC_FALSE;
22117c922b88SBarry Smith   b->colmap      = PETSC_NULL;
22127c922b88SBarry Smith   b->garray      = PETSC_NULL;
22137c922b88SBarry Smith   b->roworiented = PETSC_TRUE;
221479bdfe76SSatish Balay 
2215*6fa18ffdSBarry Smith #if defined(PEYSC_USE_MAT_SINGLE)
2216*6fa18ffdSBarry Smith   /* stuff for MatSetValues_XXX in single precision */
2217*6fa18ffdSBarry Smith   b->lensetvalues     = 0;
2218*6fa18ffdSBarry Smith   b->setvaluescopy    = PETSC_NULL;
2219*6fa18ffdSBarry Smith #endif
2220*6fa18ffdSBarry Smith 
222130793edcSSatish Balay   /* stuff used in block assembly */
222230793edcSSatish Balay   b->barray       = 0;
222330793edcSSatish Balay 
222479bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
222579bdfe76SSatish Balay   b->lvec         = 0;
222679bdfe76SSatish Balay   b->Mvctx        = 0;
222779bdfe76SSatish Balay 
222879bdfe76SSatish Balay   /* stuff for MatGetRow() */
222979bdfe76SSatish Balay   b->rowindices   = 0;
223079bdfe76SSatish Balay   b->rowvalues    = 0;
223179bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
223279bdfe76SSatish Balay 
2233a07cd24cSSatish Balay   /* hash table stuff */
2234a07cd24cSSatish Balay   b->ht           = 0;
2235187ce0cbSSatish Balay   b->hd           = 0;
22360bdbc534SSatish Balay   b->ht_size      = 0;
22377c922b88SBarry Smith   b->ht_flag      = PETSC_FALSE;
223825fdafccSSatish Balay   b->ht_fact      = 0;
2239187ce0cbSSatish Balay   b->ht_total_ct  = 0;
2240187ce0cbSSatish Balay   b->ht_insert_ct = 0;
2241a07cd24cSSatish Balay 
224279bdfe76SSatish Balay   *A = B;
2243133cdb44SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2244133cdb44SSatish Balay   if (flg) {
2245133cdb44SSatish Balay     double fact = 1.39;
2246133cdb44SSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2247f1af5d2fSBarry Smith     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2248133cdb44SSatish Balay     if (fact <= 1.0) fact = 1.39;
2249133cdb44SSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2250133cdb44SSatish Balay     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2251133cdb44SSatish Balay   }
2252f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
22537fc3c18eSBarry Smith                                      "MatStoreValues_MPIBAIJ",
22547fc3c18eSBarry Smith                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2255f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
22567fc3c18eSBarry Smith                                      "MatRetrieveValues_MPIBAIJ",
22577fc3c18eSBarry Smith                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2258f1af5d2fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
22595ef9f2a5SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
22605ef9f2a5SBarry Smith                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
22613a40ed3dSBarry Smith   PetscFunctionReturn(0);
226279bdfe76SSatish Balay }
2263026e39d0SSatish Balay 
22645615d1e5SSatish Balay #undef __FUNC__
2265b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ"
22662e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
22670ac07820SSatish Balay {
22680ac07820SSatish Balay   Mat         mat;
22690ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2270f1af5d2fSBarry Smith   int         ierr,len=0;
2271f1af5d2fSBarry Smith   PetscTruth  flg;
22720ac07820SSatish Balay 
2273d64ed03dSBarry Smith   PetscFunctionBegin;
22740ac07820SSatish Balay   *newmat       = 0;
22753f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
22760ac07820SSatish Balay   PLogObjectCreate(mat);
22770ac07820SSatish Balay   mat->data         = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2278549d3d68SSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2279e1311b90SBarry Smith   mat->ops->destroy = MatDestroy_MPIBAIJ;
2280e1311b90SBarry Smith   mat->ops->view    = MatView_MPIBAIJ;
22810ac07820SSatish Balay   mat->factor       = matin->factor;
22820ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
2283aef5e8e0SSatish Balay   mat->insertmode   = NOT_SET_VALUES;
22840ac07820SSatish Balay 
22850ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
22860ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
22870ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
22880ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
22890ac07820SSatish Balay 
22900ac07820SSatish Balay   a->bs  = oldmat->bs;
22910ac07820SSatish Balay   a->bs2 = oldmat->bs2;
22920ac07820SSatish Balay   a->mbs = oldmat->mbs;
22930ac07820SSatish Balay   a->nbs = oldmat->nbs;
22940ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
22950ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
22960ac07820SSatish Balay 
22970ac07820SSatish Balay   a->rstart       = oldmat->rstart;
22980ac07820SSatish Balay   a->rend         = oldmat->rend;
22990ac07820SSatish Balay   a->cstart       = oldmat->cstart;
23000ac07820SSatish Balay   a->cend         = oldmat->cend;
23010ac07820SSatish Balay   a->size         = oldmat->size;
23020ac07820SSatish Balay   a->rank         = oldmat->rank;
2303aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2304aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2305aef5e8e0SSatish Balay   a->rowindices   = 0;
23060ac07820SSatish Balay   a->rowvalues    = 0;
23070ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
230830793edcSSatish Balay   a->barray       = 0;
23093123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
23103123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
23113123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
23123123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
23130ac07820SSatish Balay 
2314133cdb44SSatish Balay   /* hash table stuff */
2315133cdb44SSatish Balay   a->ht           = 0;
2316133cdb44SSatish Balay   a->hd           = 0;
2317133cdb44SSatish Balay   a->ht_size      = 0;
2318133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
231925fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2320133cdb44SSatish Balay   a->ht_total_ct  = 0;
2321133cdb44SSatish Balay   a->ht_insert_ct = 0;
2322133cdb44SSatish Balay 
2323133cdb44SSatish Balay 
2324ff2fd236SBarry Smith   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2325ff2fd236SBarry Smith   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
23260ac07820SSatish Balay   a->cowners    = a->rowners + a->size + 2;
2327ff2fd236SBarry Smith   a->rowners_bs = a->cowners + a->size + 2;
2328549d3d68SSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
23298798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
23308798bf22SSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
23310ac07820SSatish Balay   if (oldmat->colmap) {
2332aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
23330f5bd95cSBarry Smith   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
233448e59246SSatish Balay #else
23350ac07820SSatish Balay     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
23360ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2337549d3d68SSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
233848e59246SSatish Balay #endif
23390ac07820SSatish Balay   } else a->colmap = 0;
23400ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
23410ac07820SSatish Balay     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
23420ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
2343549d3d68SSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
23440ac07820SSatish Balay   } else a->garray = 0;
23450ac07820SSatish Balay 
23460ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
23470ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
23480ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2349e18c124aSSatish Balay 
23500ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
23512e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
23520ac07820SSatish Balay   PLogObjectParent(mat,a->A);
23532e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
23540ac07820SSatish Balay   PLogObjectParent(mat,a->B);
23550ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2356e18c124aSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
23570ac07820SSatish Balay   if (flg) {
23580ac07820SSatish Balay     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
23590ac07820SSatish Balay   }
23600ac07820SSatish Balay   *newmat = mat;
23613a40ed3dSBarry Smith   PetscFunctionReturn(0);
23620ac07820SSatish Balay }
236357b952d6SSatish Balay 
236457b952d6SSatish Balay #include "sys.h"
236557b952d6SSatish Balay 
23665615d1e5SSatish Balay #undef __FUNC__
2367b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ"
236857b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
236957b952d6SSatish Balay {
237057b952d6SSatish Balay   Mat          A;
237157b952d6SSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
237257b952d6SSatish Balay   Scalar       *vals,*buf;
237357b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
237457b952d6SSatish Balay   MPI_Status   status;
2375cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
237657b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2377f1af5d2fSBarry Smith   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
237857b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
237957b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
238057b952d6SSatish Balay 
2381d64ed03dSBarry Smith   PetscFunctionBegin;
2382f1af5d2fSBarry Smith   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
238357b952d6SSatish Balay 
2384d132466eSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2385d132466eSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
238657b952d6SSatish Balay   if (!rank) {
238757b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2388e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2389a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2390d64ed03dSBarry Smith     if (header[3] < 0) {
2391a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2392d64ed03dSBarry Smith     }
23936c5fab8fSBarry Smith   }
2394d64ed03dSBarry Smith 
2395ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
239657b952d6SSatish Balay   M = header[1]; N = header[2];
239757b952d6SSatish Balay 
2398a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
239957b952d6SSatish Balay 
240057b952d6SSatish Balay   /*
240157b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
240257b952d6SSatish Balay      divisible by the blocksize
240357b952d6SSatish Balay   */
240457b952d6SSatish Balay   Mbs        = M/bs;
240557b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
240657b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
240757b952d6SSatish Balay   else                  Mbs++;
240857b952d6SSatish Balay   if (extra_rows &&!rank) {
2409b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
241057b952d6SSatish Balay   }
2411537820f0SBarry Smith 
241257b952d6SSatish Balay   /* determine ownership of all rows */
241357b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
241457b952d6SSatish Balay   m   = mbs * bs;
2415cee3aa6bSSatish Balay   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2416cee3aa6bSSatish Balay   browners = rowners + size + 1;
2417ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
241857b952d6SSatish Balay   rowners[0] = 0;
2419cee3aa6bSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2420cee3aa6bSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
242157b952d6SSatish Balay   rstart = rowners[rank];
242257b952d6SSatish Balay   rend   = rowners[rank+1];
242357b952d6SSatish Balay 
242457b952d6SSatish Balay   /* distribute row lengths to all processors */
242557b952d6SSatish Balay   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
242657b952d6SSatish Balay   if (!rank) {
242757b952d6SSatish Balay     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2428e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
242957b952d6SSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
243057b952d6SSatish Balay     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2431cee3aa6bSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2432ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2433606d414cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2434d64ed03dSBarry Smith   } else {
2435ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
243657b952d6SSatish Balay   }
243757b952d6SSatish Balay 
243857b952d6SSatish Balay   if (!rank) {
243957b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
244057b952d6SSatish Balay     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2441549d3d68SSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
244257b952d6SSatish Balay     for (i=0; i<size; i++) {
244357b952d6SSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
244457b952d6SSatish Balay         procsnz[i] += rowlengths[j];
244557b952d6SSatish Balay       }
244657b952d6SSatish Balay     }
2447606d414cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
244857b952d6SSatish Balay 
244957b952d6SSatish Balay     /* determine max buffer needed and allocate it */
245057b952d6SSatish Balay     maxnz = 0;
245157b952d6SSatish Balay     for (i=0; i<size; i++) {
245257b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
245357b952d6SSatish Balay     }
245457b952d6SSatish Balay     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
245557b952d6SSatish Balay 
245657b952d6SSatish Balay     /* read in my part of the matrix column indices  */
245757b952d6SSatish Balay     nz = procsnz[0];
245857b952d6SSatish Balay     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
245957b952d6SSatish Balay     mycols = ibuf;
2460cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2461e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2462cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2463cee3aa6bSSatish Balay 
246457b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
246557b952d6SSatish Balay     for (i=1; i<size-1; i++) {
246657b952d6SSatish Balay       nz   = procsnz[i];
2467e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2468ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
246957b952d6SSatish Balay     }
247057b952d6SSatish Balay     /* read in the stuff for the last proc */
247157b952d6SSatish Balay     if (size != 1) {
247257b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2473e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
247457b952d6SSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2475ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
247657b952d6SSatish Balay     }
2477606d414cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2478d64ed03dSBarry Smith   } else {
247957b952d6SSatish Balay     /* determine buffer space needed for message */
248057b952d6SSatish Balay     nz = 0;
248157b952d6SSatish Balay     for (i=0; i<m; i++) {
248257b952d6SSatish Balay       nz += locrowlens[i];
248357b952d6SSatish Balay     }
248457b952d6SSatish Balay     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
248557b952d6SSatish Balay     mycols = ibuf;
248657b952d6SSatish Balay     /* receive message of column indices*/
2487ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2488ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2489a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
249057b952d6SSatish Balay   }
249157b952d6SSatish Balay 
249257b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2493cee3aa6bSSatish Balay   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2494cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
249557b952d6SSatish Balay   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2496549d3d68SSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
249757b952d6SSatish Balay   masked1 = mask    + Mbs;
249857b952d6SSatish Balay   masked2 = masked1 + Mbs;
249957b952d6SSatish Balay   rowcount = 0; nzcount = 0;
250057b952d6SSatish Balay   for (i=0; i<mbs; i++) {
250157b952d6SSatish Balay     dcount  = 0;
250257b952d6SSatish Balay     odcount = 0;
250357b952d6SSatish Balay     for (j=0; j<bs; j++) {
250457b952d6SSatish Balay       kmax = locrowlens[rowcount];
250557b952d6SSatish Balay       for (k=0; k<kmax; k++) {
250657b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
250757b952d6SSatish Balay         if (!mask[tmp]) {
250857b952d6SSatish Balay           mask[tmp] = 1;
250957b952d6SSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
251057b952d6SSatish Balay           else masked1[dcount++] = tmp;
251157b952d6SSatish Balay         }
251257b952d6SSatish Balay       }
251357b952d6SSatish Balay       rowcount++;
251457b952d6SSatish Balay     }
2515cee3aa6bSSatish Balay 
251657b952d6SSatish Balay     dlens[i]  = dcount;
251757b952d6SSatish Balay     odlens[i] = odcount;
2518cee3aa6bSSatish Balay 
251957b952d6SSatish Balay     /* zero out the mask elements we set */
252057b952d6SSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
252157b952d6SSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
252257b952d6SSatish Balay   }
2523cee3aa6bSSatish Balay 
252457b952d6SSatish Balay   /* create our matrix */
2525549d3d68SSatish Balay   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
252657b952d6SSatish Balay   A = *newmat;
25276d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
252857b952d6SSatish Balay 
252957b952d6SSatish Balay   if (!rank) {
253057b952d6SSatish Balay     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
253157b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
253257b952d6SSatish Balay     nz = procsnz[0];
253357b952d6SSatish Balay     vals = buf;
2534cee3aa6bSSatish Balay     mycols = ibuf;
2535cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2536e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2537cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2538537820f0SBarry Smith 
253957b952d6SSatish Balay     /* insert into matrix */
254057b952d6SSatish Balay     jj      = rstart*bs;
254157b952d6SSatish Balay     for (i=0; i<m; i++) {
25423eda8832SBarry Smith       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
254357b952d6SSatish Balay       mycols += locrowlens[i];
254457b952d6SSatish Balay       vals   += locrowlens[i];
254557b952d6SSatish Balay       jj++;
254657b952d6SSatish Balay     }
254757b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
254857b952d6SSatish Balay     for (i=1; i<size-1; i++) {
254957b952d6SSatish Balay       nz   = procsnz[i];
255057b952d6SSatish Balay       vals = buf;
2551e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2552ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
255357b952d6SSatish Balay     }
255457b952d6SSatish Balay     /* the last proc */
255557b952d6SSatish Balay     if (size != 1){
255657b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2557cee3aa6bSSatish Balay       vals = buf;
2558e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
255957b952d6SSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2560ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
256157b952d6SSatish Balay     }
2562606d414cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2563d64ed03dSBarry Smith   } else {
256457b952d6SSatish Balay     /* receive numeric values */
256557b952d6SSatish Balay     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
256657b952d6SSatish Balay 
256757b952d6SSatish Balay     /* receive message of values*/
256857b952d6SSatish Balay     vals   = buf;
2569cee3aa6bSSatish Balay     mycols = ibuf;
2570ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2571ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2572a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
257357b952d6SSatish Balay 
257457b952d6SSatish Balay     /* insert into matrix */
257557b952d6SSatish Balay     jj      = rstart*bs;
2576cee3aa6bSSatish Balay     for (i=0; i<m; i++) {
25773eda8832SBarry Smith       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
257857b952d6SSatish Balay       mycols += locrowlens[i];
257957b952d6SSatish Balay       vals   += locrowlens[i];
258057b952d6SSatish Balay       jj++;
258157b952d6SSatish Balay     }
258257b952d6SSatish Balay   }
2583606d414cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2584606d414cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2585606d414cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2586606d414cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2587606d414cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2588606d414cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
25896d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25906d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
25913a40ed3dSBarry Smith   PetscFunctionReturn(0);
259257b952d6SSatish Balay }
259357b952d6SSatish Balay 
2594133cdb44SSatish Balay #undef __FUNC__
2595b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor"
2596133cdb44SSatish Balay /*@
2597133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2598133cdb44SSatish Balay 
2599133cdb44SSatish Balay    Input Parameters:
2600133cdb44SSatish Balay .  mat  - the matrix
2601133cdb44SSatish Balay .  fact - factor
2602133cdb44SSatish Balay 
2603fee21e36SBarry Smith    Collective on Mat
2604fee21e36SBarry Smith 
26058c890885SBarry Smith    Level: advanced
26068c890885SBarry Smith 
2607133cdb44SSatish Balay   Notes:
2608133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2609133cdb44SSatish Balay 
2610133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2611133cdb44SSatish Balay 
2612133cdb44SSatish Balay .seealso: MatSetOption()
2613133cdb44SSatish Balay @*/
2614329f5518SBarry Smith int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2615133cdb44SSatish Balay {
261625fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2617133cdb44SSatish Balay 
2618133cdb44SSatish Balay   PetscFunctionBegin;
2619133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
262025fdafccSSatish Balay   if (mat->type != MATMPIBAIJ) {
2621133cdb44SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2622133cdb44SSatish Balay   }
2623133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2624133cdb44SSatish Balay   baij->ht_fact = fact;
2625133cdb44SSatish Balay   PetscFunctionReturn(0);
2626133cdb44SSatish Balay }
2627