xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 596b8d2e907766f4f3c92fab62aa7579371552eb)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*596b8d2eSBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.75 1997/07/29 14:10:02 bsmith Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2257b952d6SSatish Balay 
233b2fbd54SBarry Smith 
24537820f0SBarry Smith /*
25537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2857b952d6SSatish Balay    length of colmap equals the global matrix length.
2957b952d6SSatish Balay */
305615d1e5SSatish Balay #undef __FUNC__
315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3357b952d6SSatish Balay {
3457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
36928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3757b952d6SSatish Balay 
38758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4057b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
41928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4257b952d6SSatish Balay   return 0;
4357b952d6SSatish Balay }
4457b952d6SSatish Balay 
455615d1e5SSatish Balay #undef __FUNC__
465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
483b2fbd54SBarry Smith                             PetscTruth *done)
4957b952d6SSatish Balay {
503b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5157b952d6SSatish Balay   int         ierr;
523b2fbd54SBarry Smith   if (aij->size == 1) {
533b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
553b2fbd54SBarry Smith   return 0;
563b2fbd54SBarry Smith }
573b2fbd54SBarry Smith 
585615d1e5SSatish Balay #undef __FUNC__
595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
613b2fbd54SBarry Smith                                 PetscTruth *done)
623b2fbd54SBarry Smith {
633b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
643b2fbd54SBarry Smith   int        ierr;
653b2fbd54SBarry Smith   if (aij->size == 1) {
663b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
67e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6857b952d6SSatish Balay   return 0;
6957b952d6SSatish Balay }
7080c1aa95SSatish Balay #define CHUNKSIZE  10
7180c1aa95SSatish Balay 
72f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7380c1aa95SSatish Balay { \
7480c1aa95SSatish Balay  \
7580c1aa95SSatish Balay     brow = row/bs;  \
7680c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
77ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
7880c1aa95SSatish Balay       bcol = col/bs; \
7980c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
80ab26458aSBarry Smith       low = 0; high = nrow; \
81ab26458aSBarry Smith       while (high-low > 3) { \
82ab26458aSBarry Smith         t = (low+high)/2; \
83ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
84ab26458aSBarry Smith         else              low  = t; \
85ab26458aSBarry Smith       } \
86ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
8780c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8880c1aa95SSatish Balay         if (rp[_i] == bcol) { \
8980c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
90eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
91eada6651SSatish Balay           else                    *bap  = value;  \
92ac7a638eSSatish Balay           goto a_noinsert; \
9380c1aa95SSatish Balay         } \
9480c1aa95SSatish Balay       } \
9589280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
9611ebbc71SLois Curfman McInnes       else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
9780c1aa95SSatish Balay       if (nrow >= rmax) { \
9880c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9980c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
10080c1aa95SSatish Balay         Scalar *new_a; \
10180c1aa95SSatish Balay  \
10211ebbc71SLois Curfman McInnes         if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
10389280ab3SLois Curfman McInnes  \
10480c1aa95SSatish Balay         /* malloc new storage space */ \
10580c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
10680c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
10780c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10880c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10980c1aa95SSatish Balay  \
11080c1aa95SSatish Balay         /* copy over old data into new slots */ \
11180c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
11280c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
11380c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
11480c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
11580c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
11680c1aa95SSatish Balay                                                            len*sizeof(int)); \
11780c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11880c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11980c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
12080c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
12180c1aa95SSatish Balay         /* free up old matrix storage */ \
12280c1aa95SSatish Balay         PetscFree(a->a);  \
12380c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
12480c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
12580c1aa95SSatish Balay         a->singlemalloc = 1; \
12680c1aa95SSatish Balay  \
12780c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
128ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
12980c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
13080c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
13180c1aa95SSatish Balay         a->reallocs++; \
13280c1aa95SSatish Balay         a->nz++; \
13380c1aa95SSatish Balay       } \
13480c1aa95SSatish Balay       N = nrow++ - 1;  \
13580c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
13680c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
13780c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13880c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13980c1aa95SSatish Balay       } \
14080c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
14180c1aa95SSatish Balay       rp[_i]                      = bcol;  \
14280c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
143ac7a638eSSatish Balay       a_noinsert:; \
14480c1aa95SSatish Balay     ailen[brow] = nrow; \
14580c1aa95SSatish Balay }
14657b952d6SSatish Balay 
147ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
148ac7a638eSSatish Balay { \
149ac7a638eSSatish Balay  \
150ac7a638eSSatish Balay     brow = row/bs;  \
151ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
153ac7a638eSSatish Balay       bcol = col/bs; \
154ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
155ac7a638eSSatish Balay       low = 0; high = nrow; \
156ac7a638eSSatish Balay       while (high-low > 3) { \
157ac7a638eSSatish Balay         t = (low+high)/2; \
158ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
159ac7a638eSSatish Balay         else              low  = t; \
160ac7a638eSSatish Balay       } \
161ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
162ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
163ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
164ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
165ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
166ac7a638eSSatish Balay           else                    *bap  = value;  \
167ac7a638eSSatish Balay           goto b_noinsert; \
168ac7a638eSSatish Balay         } \
169ac7a638eSSatish Balay       } \
17089280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
17111ebbc71SLois Curfman McInnes       else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
172ac7a638eSSatish Balay       if (nrow >= rmax) { \
173ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
17474c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
175ac7a638eSSatish Balay         Scalar *new_a; \
176ac7a638eSSatish Balay  \
17711ebbc71SLois Curfman McInnes         if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
17889280ab3SLois Curfman McInnes  \
179ac7a638eSSatish Balay         /* malloc new storage space */ \
18074c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
181ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
182ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
183ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
184ac7a638eSSatish Balay  \
185ac7a638eSSatish Balay         /* copy over old data into new slots */ \
186ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
18774c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
188ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
189ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
190ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
191ac7a638eSSatish Balay                                                            len*sizeof(int)); \
192ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
193ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
194ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
195ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
196ac7a638eSSatish Balay         /* free up old matrix storage */ \
19774c639caSSatish Balay         PetscFree(b->a);  \
19874c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
19974c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
20074c639caSSatish Balay         b->singlemalloc = 1; \
201ac7a638eSSatish Balay  \
202ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
203ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
20474c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
20574c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
20674c639caSSatish Balay         b->reallocs++; \
20774c639caSSatish Balay         b->nz++; \
208ac7a638eSSatish Balay       } \
209ac7a638eSSatish Balay       N = nrow++ - 1;  \
210ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
211ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
212ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
213ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
214ac7a638eSSatish Balay       } \
215ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
216ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
217ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
218ac7a638eSSatish Balay       b_noinsert:; \
219ac7a638eSSatish Balay     bilen[brow] = nrow; \
220ac7a638eSSatish Balay }
221ac7a638eSSatish Balay 
2225615d1e5SSatish Balay #undef __FUNC__
2235615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
224ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
22557b952d6SSatish Balay {
22657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
22757b952d6SSatish Balay   Scalar      value;
2284fa0d573SSatish Balay   int         ierr,i,j,row,col;
2294fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2304fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2314fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
23257b952d6SSatish Balay 
233eada6651SSatish Balay   /* Some Variables required in the macro */
23480c1aa95SSatish Balay   Mat         A = baij->A;
23580c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
236ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
237ac7a638eSSatish Balay   Scalar      *aa=a->a;
238ac7a638eSSatish Balay 
239ac7a638eSSatish Balay   Mat         B = baij->B;
240ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
241ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
242ac7a638eSSatish Balay   Scalar      *ba=b->a;
243ac7a638eSSatish Balay 
244ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
245ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
246ac7a638eSSatish Balay   Scalar      *ap,*bap;
24780c1aa95SSatish Balay 
24857b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
249639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
250e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
251e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
252639f9d9dSBarry Smith #endif
25357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
25457b952d6SSatish Balay       row = im[i] - rstart_orig;
25557b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
25657b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
25757b952d6SSatish Balay           col = in[j] - cstart_orig;
25857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
26080c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
26157b952d6SSatish Balay         }
262639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
263e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
264e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
265639f9d9dSBarry Smith #endif
26657b952d6SSatish Balay         else {
26757b952d6SSatish Balay           if (mat->was_assembled) {
268905e6a2fSBarry Smith             if (!baij->colmap) {
269905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
270905e6a2fSBarry Smith             }
271905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
27257b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
27357b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
27457b952d6SSatish Balay               col =  in[j];
2759bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2769bf004c3SSatish Balay               B = baij->B;
2779bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2789bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2799bf004c3SSatish Balay               ba=b->a;
28057b952d6SSatish Balay             }
28157b952d6SSatish Balay           }
28257b952d6SSatish Balay           else col = in[j];
28357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
284ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
285ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
28657b952d6SSatish Balay         }
28757b952d6SSatish Balay       }
28857b952d6SSatish Balay     }
28957b952d6SSatish Balay     else {
29090f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
29157b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
29257b952d6SSatish Balay       }
29357b952d6SSatish Balay       else {
29490f02eecSBarry Smith         if (!baij->donotstash) {
29557b952d6SSatish Balay           row = im[i];
29657b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
29757b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
29857b952d6SSatish Balay           }
29957b952d6SSatish Balay         }
30057b952d6SSatish Balay       }
30157b952d6SSatish Balay     }
30290f02eecSBarry Smith   }
30357b952d6SSatish Balay   return 0;
30457b952d6SSatish Balay }
30557b952d6SSatish Balay 
306ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
307ab26458aSBarry Smith #undef __FUNC__
308ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
309ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
310ab26458aSBarry Smith {
311ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
31230793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
313abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
314ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
315ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
316ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
317ab26458aSBarry Smith 
31830793edcSSatish Balay 
31930793edcSSatish Balay   if(!barray) {
32047513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
32130793edcSSatish Balay   }
32230793edcSSatish Balay 
323ab26458aSBarry Smith   if (roworiented) {
324ab26458aSBarry Smith     stepval = (n-1)*bs;
325ab26458aSBarry Smith   } else {
326ab26458aSBarry Smith     stepval = (m-1)*bs;
327ab26458aSBarry Smith   }
328ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
329ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
330ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
331ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
332ab26458aSBarry Smith #endif
333ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
334ab26458aSBarry Smith       row = im[i] - rstart;
335ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
33615b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
33715b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
33815b57d14SSatish Balay           barray = v + i*bs2;
33915b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
34015b57d14SSatish Balay           barray = v + j*bs2;
34115b57d14SSatish Balay         } else { /* Here a copy is required */
342ab26458aSBarry Smith           if (roworiented) {
343ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
344ab26458aSBarry Smith           } else {
345ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
346abef11f7SSatish Balay           }
34747513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
34847513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
34930793edcSSatish Balay               *barray++  = *value++;
35047513183SBarry Smith             }
35147513183SBarry Smith           }
35230793edcSSatish Balay           barray -=bs2;
35315b57d14SSatish Balay         }
354abef11f7SSatish Balay 
355abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
356abef11f7SSatish Balay           col  = in[j] - cstart;
35730793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
358ab26458aSBarry Smith         }
359ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
360ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
36147513183SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Column too large");}
362ab26458aSBarry Smith #endif
363ab26458aSBarry Smith         else {
364ab26458aSBarry Smith           if (mat->was_assembled) {
365ab26458aSBarry Smith             if (!baij->colmap) {
366ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
367ab26458aSBarry Smith             }
368a5eb4965SSatish Balay 
369a5eb4965SSatish Balay #if defined(PETSC_BOPT_g)
370a5eb4965SSatish Balay             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(1,0,"Incorrect colmap");}
371a5eb4965SSatish Balay #endif
372a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
373ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
374ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
375ab26458aSBarry Smith               col =  in[j];
376ab26458aSBarry Smith             }
377ab26458aSBarry Smith           }
378ab26458aSBarry Smith           else col = in[j];
37930793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
380ab26458aSBarry Smith         }
381ab26458aSBarry Smith       }
382ab26458aSBarry Smith     }
383ab26458aSBarry Smith     else {
384ab26458aSBarry Smith       if (!baij->donotstash) {
385ab26458aSBarry Smith         if (roworiented ) {
386abef11f7SSatish Balay           row   = im[i]*bs;
387abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
388abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
389abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
390abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
391abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
392abef11f7SSatish Balay               }
393ab26458aSBarry Smith             }
394ab26458aSBarry Smith           }
395ab26458aSBarry Smith         }
396ab26458aSBarry Smith         else {
397ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
398abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
399abef11f7SSatish Balay             col   = in[j]*bs;
400abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
401abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
402abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
403ab26458aSBarry Smith               }
404ab26458aSBarry Smith             }
405ab26458aSBarry Smith           }
406abef11f7SSatish Balay         }
407abef11f7SSatish Balay       }
408ab26458aSBarry Smith     }
409ab26458aSBarry Smith   }
410ab26458aSBarry Smith   return 0;
411ab26458aSBarry Smith }
412ab26458aSBarry Smith 
4135615d1e5SSatish Balay #undef __FUNC__
4145615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
415ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
416d6de1c52SSatish Balay {
417d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
418d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
419d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
420d6de1c52SSatish Balay 
421d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
422e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
423e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
424d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
425d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
426d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
427e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
428e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
429d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
430d6de1c52SSatish Balay           col = idxn[j] - bscstart;
431d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
432d6de1c52SSatish Balay         }
433d6de1c52SSatish Balay         else {
434905e6a2fSBarry Smith           if (!baij->colmap) {
435905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
436905e6a2fSBarry Smith           }
437e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
438dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
439d9d09a02SSatish Balay           else {
440dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
441d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
442d6de1c52SSatish Balay           }
443d6de1c52SSatish Balay         }
444d6de1c52SSatish Balay       }
445d9d09a02SSatish Balay     }
446d6de1c52SSatish Balay     else {
447e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
448d6de1c52SSatish Balay     }
449d6de1c52SSatish Balay   }
450d6de1c52SSatish Balay   return 0;
451d6de1c52SSatish Balay }
452d6de1c52SSatish Balay 
4535615d1e5SSatish Balay #undef __FUNC__
4545615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
455ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
456d6de1c52SSatish Balay {
457d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
458d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
459acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
460d6de1c52SSatish Balay   double     sum = 0.0;
461d6de1c52SSatish Balay   Scalar     *v;
462d6de1c52SSatish Balay 
463d6de1c52SSatish Balay   if (baij->size == 1) {
464d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
465d6de1c52SSatish Balay   } else {
466d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
467d6de1c52SSatish Balay       v = amat->a;
468d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
469d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
470d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
471d6de1c52SSatish Balay #else
472d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
473d6de1c52SSatish Balay #endif
474d6de1c52SSatish Balay       }
475d6de1c52SSatish Balay       v = bmat->a;
476d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
477d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
478d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
479d6de1c52SSatish Balay #else
480d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
481d6de1c52SSatish Balay #endif
482d6de1c52SSatish Balay       }
483d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
484d6de1c52SSatish Balay       *norm = sqrt(*norm);
485d6de1c52SSatish Balay     }
486acdf5bf4SSatish Balay     else
487e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
488d6de1c52SSatish Balay   }
489d6de1c52SSatish Balay   return 0;
490d6de1c52SSatish Balay }
49157b952d6SSatish Balay 
4925615d1e5SSatish Balay #undef __FUNC__
4935615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
494ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
49557b952d6SSatish Balay {
49657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
49757b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
49857b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
49957b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
50057b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
50157b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
50257b952d6SSatish Balay   InsertMode  addv;
50357b952d6SSatish Balay   Scalar      *rvalues,*svalues;
50457b952d6SSatish Balay 
50557b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
506e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
50757b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
508e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
50957b952d6SSatish Balay   }
510e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
51157b952d6SSatish Balay 
51257b952d6SSatish Balay   /*  first count number of contributors to each processor */
51357b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
51457b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
51557b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
51657b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
51757b952d6SSatish Balay     idx = baij->stash.idx[i];
51857b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
51957b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
52057b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
52157b952d6SSatish Balay       }
52257b952d6SSatish Balay     }
52357b952d6SSatish Balay   }
52457b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
52557b952d6SSatish Balay 
52657b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
52757b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
52857b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
52957b952d6SSatish Balay   nreceives = work[rank];
53057b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
53157b952d6SSatish Balay   nmax = work[rank];
53257b952d6SSatish Balay   PetscFree(work);
53357b952d6SSatish Balay 
53457b952d6SSatish Balay   /* post receives:
53557b952d6SSatish Balay        1) each message will consist of ordered pairs
53657b952d6SSatish Balay      (global index,value) we store the global index as a double
53757b952d6SSatish Balay      to simplify the message passing.
53857b952d6SSatish Balay        2) since we don't know how long each individual message is we
53957b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
54057b952d6SSatish Balay      this is a lot of wasted space.
54157b952d6SSatish Balay 
54257b952d6SSatish Balay 
54357b952d6SSatish Balay        This could be done better.
54457b952d6SSatish Balay   */
54557b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
54657b952d6SSatish Balay   CHKPTRQ(rvalues);
54757b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
54857b952d6SSatish Balay   CHKPTRQ(recv_waits);
54957b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
55057b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
55157b952d6SSatish Balay               comm,recv_waits+i);
55257b952d6SSatish Balay   }
55357b952d6SSatish Balay 
55457b952d6SSatish Balay   /* do sends:
55557b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
55657b952d6SSatish Balay          the ith processor
55757b952d6SSatish Balay   */
55857b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
55957b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
56057b952d6SSatish Balay   CHKPTRQ(send_waits);
56157b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
56257b952d6SSatish Balay   starts[0] = 0;
56357b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
56457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
56557b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
56657b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
56757b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
56857b952d6SSatish Balay   }
56957b952d6SSatish Balay   PetscFree(owner);
57057b952d6SSatish Balay   starts[0] = 0;
57157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
57257b952d6SSatish Balay   count = 0;
57357b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
57457b952d6SSatish Balay     if (procs[i]) {
57557b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
57657b952d6SSatish Balay                 comm,send_waits+count++);
57757b952d6SSatish Balay     }
57857b952d6SSatish Balay   }
57957b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
58057b952d6SSatish Balay 
58157b952d6SSatish Balay   /* Free cache space */
582d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
58357b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
58457b952d6SSatish Balay 
58557b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
58657b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
58757b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
58857b952d6SSatish Balay   baij->rmax       = nmax;
58957b952d6SSatish Balay 
59057b952d6SSatish Balay   return 0;
59157b952d6SSatish Balay }
592*596b8d2eSBarry Smith #include <math.h>
593*596b8d2eSBarry Smith #define HASH_KEY 0.6180339887
594*596b8d2eSBarry Smith #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))+1)
59557b952d6SSatish Balay 
596*596b8d2eSBarry Smith int CreateHashTable(Mat mat)
597*596b8d2eSBarry Smith {
598*596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
599*596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
600*596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
601*596b8d2eSBarry Smith   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
602*596b8d2eSBarry Smith   int         size=1.5*nz,ct=0,max=0;
603*596b8d2eSBarry Smith   Scalar      *aa=a->a,*ba=b->a;
604*596b8d2eSBarry Smith   double      key;
605*596b8d2eSBarry Smith   static double *HT;
606*596b8d2eSBarry Smith   static      int flag=1;
607*596b8d2eSBarry Smith 
608*596b8d2eSBarry Smith 
609*596b8d2eSBarry Smith   /* Allocate Memory for Hash Table */
610*596b8d2eSBarry Smith   if (flag) {
611*596b8d2eSBarry Smith     HT = (double*)PetscMalloc(size*sizeof(double));
612*596b8d2eSBarry Smith     flag = 0;
613*596b8d2eSBarry Smith   }
614*596b8d2eSBarry Smith   PetscMemzero(HT,size*sizeof(double));
615*596b8d2eSBarry Smith 
616*596b8d2eSBarry Smith   /* Loop Over A */
617*596b8d2eSBarry Smith   for ( i=0; i<a->n; i++ ) {
618*596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
619*596b8d2eSBarry Smith       key = i*baij->n+aj[j]+1;
620*596b8d2eSBarry Smith       h1  = HASH1(size,key);
621*596b8d2eSBarry Smith 
622*596b8d2eSBarry Smith       for ( k=1; k<size; k++ ){
623*596b8d2eSBarry Smith         if (HT[(h1*k)%size] == 0.0) {
624*596b8d2eSBarry Smith           HT[(h1*k)%size] = key;
625*596b8d2eSBarry Smith           break;
626*596b8d2eSBarry Smith         } else ct++;
627*596b8d2eSBarry Smith       }
628*596b8d2eSBarry Smith       if (k> max) max =k;
629*596b8d2eSBarry Smith     }
630*596b8d2eSBarry Smith   }
631*596b8d2eSBarry Smith    printf("***max1 = %d\n",max);
632*596b8d2eSBarry Smith   /* Loop Over B */
633*596b8d2eSBarry Smith   for ( i=0; i<b->n; i++ ) {
634*596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
635*596b8d2eSBarry Smith       key = i*b->n+bj[j]+1;
636*596b8d2eSBarry Smith       h1  = HASH1(size,key);
637*596b8d2eSBarry Smith       for ( k=1; k<size; k++ ){
638*596b8d2eSBarry Smith         if (HT[(h1*k)%size] == 0.0) {
639*596b8d2eSBarry Smith           HT[(h1*k)%size] = key;
640*596b8d2eSBarry Smith           break;
641*596b8d2eSBarry Smith         } else ct++;
642*596b8d2eSBarry Smith       }
643*596b8d2eSBarry Smith       if (k> max) max =k;
644*596b8d2eSBarry Smith     }
645*596b8d2eSBarry Smith   }
646*596b8d2eSBarry Smith 
647*596b8d2eSBarry Smith   printf("***max2 = %d\n",max);
648*596b8d2eSBarry Smith   /* Print Summary */
649*596b8d2eSBarry Smith   for ( i=0,key=0.0,j=0; i<size; i++)
650*596b8d2eSBarry Smith     if (HT[i]) {j++;}
651*596b8d2eSBarry Smith 
652*596b8d2eSBarry Smith   printf("Size %d Average Buckets %d no of Keys %d\n",size,ct,j);
653*596b8d2eSBarry Smith   return 0;
654*596b8d2eSBarry Smith }
65557b952d6SSatish Balay 
6565615d1e5SSatish Balay #undef __FUNC__
6575615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
658ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
65957b952d6SSatish Balay {
66057b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
66157b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
66257b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
663*596b8d2eSBarry Smith   int         bs=baij->bs,row,col,other_disassembled,flg;
66457b952d6SSatish Balay   Scalar      *values,val;
665e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
66657b952d6SSatish Balay 
66757b952d6SSatish Balay   /*  wait on receives */
66857b952d6SSatish Balay   while (count) {
66957b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
67057b952d6SSatish Balay     /* unpack receives into our local space */
67157b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
67257b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
67357b952d6SSatish Balay     n = n/3;
67457b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
67557b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
67657b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
67757b952d6SSatish Balay       val = values[3*i+2];
67857b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
67957b952d6SSatish Balay         col -= baij->cstart*bs;
6806fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
68157b952d6SSatish Balay       }
68257b952d6SSatish Balay       else {
68357b952d6SSatish Balay         if (mat->was_assembled) {
684905e6a2fSBarry Smith           if (!baij->colmap) {
685905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
686905e6a2fSBarry Smith           }
687a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
68857b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
68957b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
69057b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
69157b952d6SSatish Balay           }
69257b952d6SSatish Balay         }
6936fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
69457b952d6SSatish Balay       }
69557b952d6SSatish Balay     }
69657b952d6SSatish Balay     count--;
69757b952d6SSatish Balay   }
69857b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
69957b952d6SSatish Balay 
70057b952d6SSatish Balay   /* wait on sends */
70157b952d6SSatish Balay   if (baij->nsends) {
70257b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
70357b952d6SSatish Balay     CHKPTRQ(send_status);
70457b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
70557b952d6SSatish Balay     PetscFree(send_status);
70657b952d6SSatish Balay   }
70757b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
70857b952d6SSatish Balay 
70957b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
71057b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
71157b952d6SSatish Balay 
71257b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
71357b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
71457b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
71557b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
71657b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
71757b952d6SSatish Balay   }
71857b952d6SSatish Balay 
7196d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
72057b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
72157b952d6SSatish Balay   }
72257b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
72357b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
72457b952d6SSatish Balay 
725*596b8d2eSBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-use_hash",&flg); CHKERRQ(ierr);
726*596b8d2eSBarry Smith   if (flg) CreateHashTable(mat);
72757b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
72857b952d6SSatish Balay   return 0;
72957b952d6SSatish Balay }
73057b952d6SSatish Balay 
7315615d1e5SSatish Balay #undef __FUNC__
7325615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
73357b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
73457b952d6SSatish Balay {
73557b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
73657b952d6SSatish Balay   int          ierr;
73757b952d6SSatish Balay 
73857b952d6SSatish Balay   if (baij->size == 1) {
73957b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
74057b952d6SSatish Balay   }
741e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
74257b952d6SSatish Balay   return 0;
74357b952d6SSatish Balay }
74457b952d6SSatish Balay 
7455615d1e5SSatish Balay #undef __FUNC__
7465615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
74757b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
74857b952d6SSatish Balay {
74957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
750cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
75157b952d6SSatish Balay   FILE         *fd;
75257b952d6SSatish Balay   ViewerType   vtype;
75357b952d6SSatish Balay 
75457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
75557b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
75657b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
757639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
7584e220ebcSLois Curfman McInnes       MatInfo info;
75957b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
76057b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
7614e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
76257b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
76357b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
7644e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
7654e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
7664e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
7674e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
7684e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
7694e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
77057b952d6SSatish Balay       fflush(fd);
77157b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
77257b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
77357b952d6SSatish Balay       return 0;
77457b952d6SSatish Balay     }
775639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
776bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
77757b952d6SSatish Balay       return 0;
77857b952d6SSatish Balay     }
77957b952d6SSatish Balay   }
78057b952d6SSatish Balay 
78157b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
78257b952d6SSatish Balay     Draw       draw;
78357b952d6SSatish Balay     PetscTruth isnull;
78457b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
78557b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
78657b952d6SSatish Balay   }
78757b952d6SSatish Balay 
78857b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
78957b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
79057b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
79157b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
79257b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
79357b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
79457b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
79557b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
79657b952d6SSatish Balay     fflush(fd);
79757b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
79857b952d6SSatish Balay   }
79957b952d6SSatish Balay   else {
80057b952d6SSatish Balay     int size = baij->size;
80157b952d6SSatish Balay     rank = baij->rank;
80257b952d6SSatish Balay     if (size == 1) {
80357b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
80457b952d6SSatish Balay     }
80557b952d6SSatish Balay     else {
80657b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
80757b952d6SSatish Balay       Mat         A;
80857b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
80957b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
81057b952d6SSatish Balay       int         mbs=baij->mbs;
81157b952d6SSatish Balay       Scalar      *a;
81257b952d6SSatish Balay 
81357b952d6SSatish Balay       if (!rank) {
814cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
81557b952d6SSatish Balay         CHKERRQ(ierr);
81657b952d6SSatish Balay       }
81757b952d6SSatish Balay       else {
818cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
81957b952d6SSatish Balay         CHKERRQ(ierr);
82057b952d6SSatish Balay       }
82157b952d6SSatish Balay       PLogObjectParent(mat,A);
82257b952d6SSatish Balay 
82357b952d6SSatish Balay       /* copy over the A part */
82457b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
82557b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
82657b952d6SSatish Balay       row = baij->rstart;
82757b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
82857b952d6SSatish Balay 
82957b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
83057b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
83157b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
83257b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
83357b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
83457b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
835cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
836cee3aa6bSSatish Balay             col++; a += bs;
83757b952d6SSatish Balay           }
83857b952d6SSatish Balay         }
83957b952d6SSatish Balay       }
84057b952d6SSatish Balay       /* copy over the B part */
84157b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
84257b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
84357b952d6SSatish Balay       row = baij->rstart*bs;
84457b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
84557b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
84657b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
84757b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
84857b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
84957b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
850cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
851cee3aa6bSSatish Balay             col++; a += bs;
85257b952d6SSatish Balay           }
85357b952d6SSatish Balay         }
85457b952d6SSatish Balay       }
85557b952d6SSatish Balay       PetscFree(rvals);
8566d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
8576d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
85857b952d6SSatish Balay       if (!rank) {
85957b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
86057b952d6SSatish Balay       }
86157b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
86257b952d6SSatish Balay     }
86357b952d6SSatish Balay   }
86457b952d6SSatish Balay   return 0;
86557b952d6SSatish Balay }
86657b952d6SSatish Balay 
86757b952d6SSatish Balay 
86857b952d6SSatish Balay 
8695615d1e5SSatish Balay #undef __FUNC__
8705615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
871ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
87257b952d6SSatish Balay {
87357b952d6SSatish Balay   Mat         mat = (Mat) obj;
87457b952d6SSatish Balay   int         ierr;
87557b952d6SSatish Balay   ViewerType  vtype;
87657b952d6SSatish Balay 
87757b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
87857b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
87957b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
88057b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
88157b952d6SSatish Balay   }
88257b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
88357b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
88457b952d6SSatish Balay   }
88557b952d6SSatish Balay   return 0;
88657b952d6SSatish Balay }
88757b952d6SSatish Balay 
8885615d1e5SSatish Balay #undef __FUNC__
8895615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
890ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
89179bdfe76SSatish Balay {
89279bdfe76SSatish Balay   Mat         mat = (Mat) obj;
89379bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
89479bdfe76SSatish Balay   int         ierr;
89579bdfe76SSatish Balay 
89679bdfe76SSatish Balay #if defined(PETSC_LOG)
89779bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
89879bdfe76SSatish Balay #endif
89979bdfe76SSatish Balay 
90083e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
90179bdfe76SSatish Balay   PetscFree(baij->rowners);
90279bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
90379bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
90479bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
90579bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
90679bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
90779bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
90879bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
90930793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
91079bdfe76SSatish Balay   PetscFree(baij);
91190f02eecSBarry Smith   if (mat->mapping) {
91290f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
91390f02eecSBarry Smith   }
91479bdfe76SSatish Balay   PLogObjectDestroy(mat);
91579bdfe76SSatish Balay   PetscHeaderDestroy(mat);
91679bdfe76SSatish Balay   return 0;
91779bdfe76SSatish Balay }
91879bdfe76SSatish Balay 
9195615d1e5SSatish Balay #undef __FUNC__
9205615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
921ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
922cee3aa6bSSatish Balay {
923cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
92447b4a8eaSLois Curfman McInnes   int         ierr, nt;
925cee3aa6bSSatish Balay 
926c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
92747b4a8eaSLois Curfman McInnes   if (nt != a->n) {
928ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
92947b4a8eaSLois Curfman McInnes   }
930c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
93147b4a8eaSLois Curfman McInnes   if (nt != a->m) {
932e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
93347b4a8eaSLois Curfman McInnes   }
93443a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
935cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
93643a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
937cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
93843a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
939cee3aa6bSSatish Balay   return 0;
940cee3aa6bSSatish Balay }
941cee3aa6bSSatish Balay 
9425615d1e5SSatish Balay #undef __FUNC__
9435615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
944ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
945cee3aa6bSSatish Balay {
946cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
947cee3aa6bSSatish Balay   int        ierr;
94843a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
949cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
95043a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
951cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
952cee3aa6bSSatish Balay   return 0;
953cee3aa6bSSatish Balay }
954cee3aa6bSSatish Balay 
9555615d1e5SSatish Balay #undef __FUNC__
9565615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
957ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
958cee3aa6bSSatish Balay {
959cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
960cee3aa6bSSatish Balay   int        ierr;
961cee3aa6bSSatish Balay 
962cee3aa6bSSatish Balay   /* do nondiagonal part */
963cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
964cee3aa6bSSatish Balay   /* send it on its way */
965537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
966cee3aa6bSSatish Balay   /* do local part */
967cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
968cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
969cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
970cee3aa6bSSatish Balay   /* but is not perhaps always true. */
971639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
972cee3aa6bSSatish Balay   return 0;
973cee3aa6bSSatish Balay }
974cee3aa6bSSatish Balay 
9755615d1e5SSatish Balay #undef __FUNC__
9765615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
977ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
978cee3aa6bSSatish Balay {
979cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
980cee3aa6bSSatish Balay   int        ierr;
981cee3aa6bSSatish Balay 
982cee3aa6bSSatish Balay   /* do nondiagonal part */
983cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
984cee3aa6bSSatish Balay   /* send it on its way */
985537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
986cee3aa6bSSatish Balay   /* do local part */
987cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
988cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
989cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
990cee3aa6bSSatish Balay   /* but is not perhaps always true. */
991537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
992cee3aa6bSSatish Balay   return 0;
993cee3aa6bSSatish Balay }
994cee3aa6bSSatish Balay 
995cee3aa6bSSatish Balay /*
996cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
997cee3aa6bSSatish Balay    diagonal block
998cee3aa6bSSatish Balay */
9995615d1e5SSatish Balay #undef __FUNC__
10005615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1001ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1002cee3aa6bSSatish Balay {
1003cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1004cee3aa6bSSatish Balay   if (a->M != a->N)
1005e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
1006cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
1007cee3aa6bSSatish Balay }
1008cee3aa6bSSatish Balay 
10095615d1e5SSatish Balay #undef __FUNC__
10105615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
1011ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1012cee3aa6bSSatish Balay {
1013cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1014cee3aa6bSSatish Balay   int        ierr;
1015cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1016cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1017cee3aa6bSSatish Balay   return 0;
1018cee3aa6bSSatish Balay }
1019026e39d0SSatish Balay 
10205615d1e5SSatish Balay #undef __FUNC__
10215615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
1022ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
102357b952d6SSatish Balay {
102457b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
102557b952d6SSatish Balay   *m = mat->M; *n = mat->N;
102657b952d6SSatish Balay   return 0;
102757b952d6SSatish Balay }
102857b952d6SSatish Balay 
10295615d1e5SSatish Balay #undef __FUNC__
10305615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1031ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
103257b952d6SSatish Balay {
103357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
103457b952d6SSatish Balay   *m = mat->m; *n = mat->N;
103557b952d6SSatish Balay   return 0;
103657b952d6SSatish Balay }
103757b952d6SSatish Balay 
10385615d1e5SSatish Balay #undef __FUNC__
10395615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1040ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
104157b952d6SSatish Balay {
104257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
104357b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
104457b952d6SSatish Balay   return 0;
104557b952d6SSatish Balay }
104657b952d6SSatish Balay 
1047acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1048acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1049acdf5bf4SSatish Balay 
10505615d1e5SSatish Balay #undef __FUNC__
10515615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1052acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1053acdf5bf4SSatish Balay {
1054acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1055acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1056acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1057d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1058d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1059acdf5bf4SSatish Balay 
1060e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
1061acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1062acdf5bf4SSatish Balay 
1063acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1064acdf5bf4SSatish Balay     /*
1065acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1066acdf5bf4SSatish Balay     */
1067acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1068bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1069bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1070acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1071acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1072acdf5bf4SSatish Balay     }
1073acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1074acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1075acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1076acdf5bf4SSatish Balay   }
1077acdf5bf4SSatish Balay 
1078acdf5bf4SSatish Balay 
1079e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
1080d9d09a02SSatish Balay   lrow = row - brstart;
1081acdf5bf4SSatish Balay 
1082acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1083acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1084acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1085acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1086acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1087acdf5bf4SSatish Balay   nztot = nzA + nzB;
1088acdf5bf4SSatish Balay 
1089acdf5bf4SSatish Balay   cmap  = mat->garray;
1090acdf5bf4SSatish Balay   if (v  || idx) {
1091acdf5bf4SSatish Balay     if (nztot) {
1092acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1093acdf5bf4SSatish Balay       int imark = -1;
1094acdf5bf4SSatish Balay       if (v) {
1095acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1096acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1097d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1098acdf5bf4SSatish Balay           else break;
1099acdf5bf4SSatish Balay         }
1100acdf5bf4SSatish Balay         imark = i;
1101acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1102acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1103acdf5bf4SSatish Balay       }
1104acdf5bf4SSatish Balay       if (idx) {
1105acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1106acdf5bf4SSatish Balay         if (imark > -1) {
1107acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1108bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1109acdf5bf4SSatish Balay           }
1110acdf5bf4SSatish Balay         } else {
1111acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1112d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1113d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1114acdf5bf4SSatish Balay             else break;
1115acdf5bf4SSatish Balay           }
1116acdf5bf4SSatish Balay           imark = i;
1117acdf5bf4SSatish Balay         }
1118d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1119d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1120acdf5bf4SSatish Balay       }
1121acdf5bf4SSatish Balay     }
1122d212a18eSSatish Balay     else {
1123d212a18eSSatish Balay       if (idx) *idx = 0;
1124d212a18eSSatish Balay       if (v)   *v   = 0;
1125d212a18eSSatish Balay     }
1126acdf5bf4SSatish Balay   }
1127acdf5bf4SSatish Balay   *nz = nztot;
1128acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1129acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1130acdf5bf4SSatish Balay   return 0;
1131acdf5bf4SSatish Balay }
1132acdf5bf4SSatish Balay 
11335615d1e5SSatish Balay #undef __FUNC__
11345615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1135acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1136acdf5bf4SSatish Balay {
1137acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1138acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1139e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
1140acdf5bf4SSatish Balay   }
1141acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
1142acdf5bf4SSatish Balay   return 0;
1143acdf5bf4SSatish Balay }
1144acdf5bf4SSatish Balay 
11455615d1e5SSatish Balay #undef __FUNC__
11465615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1147ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
11485a838052SSatish Balay {
11495a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
11505a838052SSatish Balay   *bs = baij->bs;
11515a838052SSatish Balay   return 0;
11525a838052SSatish Balay }
11535a838052SSatish Balay 
11545615d1e5SSatish Balay #undef __FUNC__
11555615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1156ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
115758667388SSatish Balay {
115858667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
115958667388SSatish Balay   int         ierr;
116058667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
116158667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
116258667388SSatish Balay   return 0;
116358667388SSatish Balay }
11640ac07820SSatish Balay 
11655615d1e5SSatish Balay #undef __FUNC__
11665615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1167ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
11680ac07820SSatish Balay {
11694e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
11704e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
11717d57db60SLois Curfman McInnes   int         ierr;
11727d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
11730ac07820SSatish Balay 
11744e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
11754e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
11764e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
11774e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
11784e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
11794e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
11804e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
11814e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
11824e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
11830ac07820SSatish Balay   if (flag == MAT_LOCAL) {
11844e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
11854e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
11864e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
11874e220ebcSLois Curfman McInnes     info->memory       = isend[3];
11884e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
11890ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1190dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
11914e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11924e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11934e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11944e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11954e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11960ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1197dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
11984e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11994e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
12004e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
12014e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
12024e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
12030ac07820SSatish Balay   }
12044e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
12054e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
12064e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
12070ac07820SSatish Balay   return 0;
12080ac07820SSatish Balay }
12090ac07820SSatish Balay 
12105615d1e5SSatish Balay #undef __FUNC__
12115615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1212ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
121358667388SSatish Balay {
121458667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
121558667388SSatish Balay 
121658667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
121758667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
12186da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1219c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
122096854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1221c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1222b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1223b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1224b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1225aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
122658667388SSatish Balay         MatSetOption(a->A,op);
122758667388SSatish Balay         MatSetOption(a->B,op);
1228b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
12296da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
123058667388SSatish Balay              op == MAT_SYMMETRIC ||
123158667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
123258667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
123358667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
123458667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
123558667388SSatish Balay     a->roworiented = 0;
123658667388SSatish Balay     MatSetOption(a->A,op);
123758667388SSatish Balay     MatSetOption(a->B,op);
12382b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
123990f02eecSBarry Smith     a->donotstash = 1;
124090f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1241e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
124258667388SSatish Balay   else
1243e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
124458667388SSatish Balay   return 0;
124558667388SSatish Balay }
124658667388SSatish Balay 
12475615d1e5SSatish Balay #undef __FUNC__
12485615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1249ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
12500ac07820SSatish Balay {
12510ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
12520ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
12530ac07820SSatish Balay   Mat        B;
12540ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
12550ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
12560ac07820SSatish Balay   Scalar     *a;
12570ac07820SSatish Balay 
12580ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1259e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
12600ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
12610ac07820SSatish Balay   CHKERRQ(ierr);
12620ac07820SSatish Balay 
12630ac07820SSatish Balay   /* copy over the A part */
12640ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
12650ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12660ac07820SSatish Balay   row = baij->rstart;
12670ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
12680ac07820SSatish Balay 
12690ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12700ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12710ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12720ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12730ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
12740ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12750ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12760ac07820SSatish Balay         col++; a += bs;
12770ac07820SSatish Balay       }
12780ac07820SSatish Balay     }
12790ac07820SSatish Balay   }
12800ac07820SSatish Balay   /* copy over the B part */
12810ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
12820ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12830ac07820SSatish Balay   row = baij->rstart*bs;
12840ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12850ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12860ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12870ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12880ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
12890ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12900ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12910ac07820SSatish Balay         col++; a += bs;
12920ac07820SSatish Balay       }
12930ac07820SSatish Balay     }
12940ac07820SSatish Balay   }
12950ac07820SSatish Balay   PetscFree(rvals);
12960ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12970ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12980ac07820SSatish Balay 
12990ac07820SSatish Balay   if (matout != PETSC_NULL) {
13000ac07820SSatish Balay     *matout = B;
13010ac07820SSatish Balay   } else {
13020ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
13030ac07820SSatish Balay     PetscFree(baij->rowners);
13040ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
13050ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
13060ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
13070ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
13080ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
13090ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
13100ac07820SSatish Balay     PetscFree(baij);
1311f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
13120ac07820SSatish Balay     PetscHeaderDestroy(B);
13130ac07820SSatish Balay   }
13140ac07820SSatish Balay   return 0;
13150ac07820SSatish Balay }
13160e95ebc0SSatish Balay 
13175615d1e5SSatish Balay #undef __FUNC__
13185615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
13190e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
13200e95ebc0SSatish Balay {
13210e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
13220e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
13230e95ebc0SSatish Balay   int ierr,s1,s2,s3;
13240e95ebc0SSatish Balay 
13250e95ebc0SSatish Balay   if (ll)  {
13260e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
13270e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1328e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
13290e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
13300e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
13310e95ebc0SSatish Balay   }
1332e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
13330e95ebc0SSatish Balay   return 0;
13340e95ebc0SSatish Balay }
13350e95ebc0SSatish Balay 
13360ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
13370ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
13380ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
13390ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
13400ac07820SSatish Balay    routine.
13410ac07820SSatish Balay */
13425615d1e5SSatish Balay #undef __FUNC__
13435615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1344ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
13450ac07820SSatish Balay {
13460ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
13470ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
13480ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
13490ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
13500ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
13510ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
13520ac07820SSatish Balay   MPI_Comm       comm = A->comm;
13530ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
13540ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
13550ac07820SSatish Balay   IS             istmp;
13560ac07820SSatish Balay 
13570ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
13580ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
13590ac07820SSatish Balay 
13600ac07820SSatish Balay   /*  first count number of contributors to each processor */
13610ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
13620ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
13630ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
13640ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13650ac07820SSatish Balay     idx = rows[i];
13660ac07820SSatish Balay     found = 0;
13670ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
13680ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
13690ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
13700ac07820SSatish Balay       }
13710ac07820SSatish Balay     }
1372e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
13730ac07820SSatish Balay   }
13740ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
13750ac07820SSatish Balay 
13760ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
13770ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
13780ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
13790ac07820SSatish Balay   nrecvs = work[rank];
13800ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
13810ac07820SSatish Balay   nmax = work[rank];
13820ac07820SSatish Balay   PetscFree(work);
13830ac07820SSatish Balay 
13840ac07820SSatish Balay   /* post receives:   */
13850ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
13860ac07820SSatish Balay   CHKPTRQ(rvalues);
13870ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
13880ac07820SSatish Balay   CHKPTRQ(recv_waits);
13890ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13900ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
13910ac07820SSatish Balay   }
13920ac07820SSatish Balay 
13930ac07820SSatish Balay   /* do sends:
13940ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
13950ac07820SSatish Balay          the ith processor
13960ac07820SSatish Balay   */
13970ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
13980ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
13990ac07820SSatish Balay   CHKPTRQ(send_waits);
14000ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
14010ac07820SSatish Balay   starts[0] = 0;
14020ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
14030ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
14040ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
14050ac07820SSatish Balay   }
14060ac07820SSatish Balay   ISRestoreIndices(is,&rows);
14070ac07820SSatish Balay 
14080ac07820SSatish Balay   starts[0] = 0;
14090ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
14100ac07820SSatish Balay   count = 0;
14110ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
14120ac07820SSatish Balay     if (procs[i]) {
14130ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
14140ac07820SSatish Balay     }
14150ac07820SSatish Balay   }
14160ac07820SSatish Balay   PetscFree(starts);
14170ac07820SSatish Balay 
14180ac07820SSatish Balay   base = owners[rank]*bs;
14190ac07820SSatish Balay 
14200ac07820SSatish Balay   /*  wait on receives */
14210ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
14220ac07820SSatish Balay   source = lens + nrecvs;
14230ac07820SSatish Balay   count  = nrecvs; slen = 0;
14240ac07820SSatish Balay   while (count) {
14250ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
14260ac07820SSatish Balay     /* unpack receives into our local space */
14270ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
14280ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
14290ac07820SSatish Balay     lens[imdex]  = n;
14300ac07820SSatish Balay     slen += n;
14310ac07820SSatish Balay     count--;
14320ac07820SSatish Balay   }
14330ac07820SSatish Balay   PetscFree(recv_waits);
14340ac07820SSatish Balay 
14350ac07820SSatish Balay   /* move the data into the send scatter */
14360ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
14370ac07820SSatish Balay   count = 0;
14380ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
14390ac07820SSatish Balay     values = rvalues + i*nmax;
14400ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
14410ac07820SSatish Balay       lrows[count++] = values[j] - base;
14420ac07820SSatish Balay     }
14430ac07820SSatish Balay   }
14440ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
14450ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
14460ac07820SSatish Balay 
14470ac07820SSatish Balay   /* actually zap the local rows */
1448029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
14490ac07820SSatish Balay   PLogObjectParent(A,istmp);
14500ac07820SSatish Balay   PetscFree(lrows);
14510ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
14520ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
14530ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
14540ac07820SSatish Balay 
14550ac07820SSatish Balay   /* wait on sends */
14560ac07820SSatish Balay   if (nsends) {
14570ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
14580ac07820SSatish Balay     CHKPTRQ(send_status);
14590ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
14600ac07820SSatish Balay     PetscFree(send_status);
14610ac07820SSatish Balay   }
14620ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
14630ac07820SSatish Balay 
14640ac07820SSatish Balay   return 0;
14650ac07820SSatish Balay }
1466ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
14675615d1e5SSatish Balay #undef __FUNC__
14685615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1469ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1470ba4ca20aSSatish Balay {
1471ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1472ba4ca20aSSatish Balay 
1473ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1474ba4ca20aSSatish Balay   else return 0;
1475ba4ca20aSSatish Balay }
14760ac07820SSatish Balay 
14775615d1e5SSatish Balay #undef __FUNC__
14785615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1479ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1480bb5a7306SBarry Smith {
1481bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1482bb5a7306SBarry Smith   int         ierr;
1483bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1484bb5a7306SBarry Smith   return 0;
1485bb5a7306SBarry Smith }
1486bb5a7306SBarry Smith 
14870ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
14880ac07820SSatish Balay 
148979bdfe76SSatish Balay /* -------------------------------------------------------------------*/
149079bdfe76SSatish Balay static struct _MatOps MatOps = {
1491bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
14924c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
14934c50302cSBarry Smith   0,0,0,0,
14940ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
14950e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
149658667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
14974c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
14984c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
14994c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
150094a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1501d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1502ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1503bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1504ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
150579bdfe76SSatish Balay 
150679bdfe76SSatish Balay 
15075615d1e5SSatish Balay #undef __FUNC__
15085615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
150979bdfe76SSatish Balay /*@C
151079bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
151179bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
151279bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
151379bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
151479bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
151579bdfe76SSatish Balay 
151679bdfe76SSatish Balay    Input Parameters:
151779bdfe76SSatish Balay .  comm - MPI communicator
151879bdfe76SSatish Balay .  bs   - size of blockk
151979bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
152092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
152192e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
152292e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
152392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
152492e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
152579bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
152692e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
152779bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
152879bdfe76SSatish Balay            submatrix  (same for all local rows)
152992e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
153092e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
153192e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
153292e8d321SLois Curfman McInnes            it is zero.
153392e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
153479bdfe76SSatish Balay            submatrix (same for all local rows).
153592e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
153692e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
153792e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
153879bdfe76SSatish Balay 
153979bdfe76SSatish Balay    Output Parameter:
154079bdfe76SSatish Balay .  A - the matrix
154179bdfe76SSatish Balay 
154279bdfe76SSatish Balay    Notes:
154379bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
154479bdfe76SSatish Balay    (possibly both).
154579bdfe76SSatish Balay 
154679bdfe76SSatish Balay    Storage Information:
154779bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
154879bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
154979bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
155079bdfe76SSatish Balay    local matrix (a rectangular submatrix).
155179bdfe76SSatish Balay 
155279bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
155379bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
155479bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
155579bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
155679bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
155779bdfe76SSatish Balay 
155879bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
155979bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
156079bdfe76SSatish Balay 
156179bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
156279bdfe76SSatish Balay $         -------------------
156379bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
156479bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
156579bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
156679bdfe76SSatish Balay $         -------------------
156779bdfe76SSatish Balay $
156879bdfe76SSatish Balay 
156979bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
157079bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
157179bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
157257b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
157379bdfe76SSatish Balay 
157479bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
157579bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
157679bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
157792e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
157892e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
15796da5968aSLois Curfman McInnes    matrices.
158079bdfe76SSatish Balay 
158192e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
158279bdfe76SSatish Balay 
158379bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
158479bdfe76SSatish Balay @*/
158579bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
158679bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
158779bdfe76SSatish Balay {
158879bdfe76SSatish Balay   Mat          B;
158979bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
15904c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
159179bdfe76SSatish Balay 
1592e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
159379bdfe76SSatish Balay   *A = 0;
1594f09e8eb9SSatish Balay   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
159579bdfe76SSatish Balay   PLogObjectCreate(B);
159679bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
159779bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
159879bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
15994c50302cSBarry Smith   MPI_Comm_size(comm,&size);
16004c50302cSBarry Smith   if (size == 1) {
16014c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
16024c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
16034c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
16044c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
16054c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
16064c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
16074c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
16084c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
16094c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
16104c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
16114c50302cSBarry Smith   }
16124c50302cSBarry Smith 
161379bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
161479bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
161590f02eecSBarry Smith   B->mapping    = 0;
161679bdfe76SSatish Balay   B->factor     = 0;
161779bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
161879bdfe76SSatish Balay 
1619e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
162079bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
162179bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
162279bdfe76SSatish Balay 
162379bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1624e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1625e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1626e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1627cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1628cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
162979bdfe76SSatish Balay 
163079bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
163179bdfe76SSatish Balay     work[0] = m; work[1] = n;
163279bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
163379bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
163479bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
163579bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
163679bdfe76SSatish Balay   }
163779bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
163879bdfe76SSatish Balay     Mbs = M/bs;
1639e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
164079bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
164179bdfe76SSatish Balay     m   = mbs*bs;
164279bdfe76SSatish Balay   }
164379bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
164479bdfe76SSatish Balay     Nbs = N/bs;
1645e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
164679bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
164779bdfe76SSatish Balay     n   = nbs*bs;
164879bdfe76SSatish Balay   }
1649e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
165079bdfe76SSatish Balay 
165179bdfe76SSatish Balay   b->m = m; B->m = m;
165279bdfe76SSatish Balay   b->n = n; B->n = n;
165379bdfe76SSatish Balay   b->N = N; B->N = N;
165479bdfe76SSatish Balay   b->M = M; B->M = M;
165579bdfe76SSatish Balay   b->bs  = bs;
165679bdfe76SSatish Balay   b->bs2 = bs*bs;
165779bdfe76SSatish Balay   b->mbs = mbs;
165879bdfe76SSatish Balay   b->nbs = nbs;
165979bdfe76SSatish Balay   b->Mbs = Mbs;
166079bdfe76SSatish Balay   b->Nbs = Nbs;
166179bdfe76SSatish Balay 
166279bdfe76SSatish Balay   /* build local table of row and column ownerships */
166379bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1664f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
16650ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
166679bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
166779bdfe76SSatish Balay   b->rowners[0] = 0;
166879bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
166979bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
167079bdfe76SSatish Balay   }
167179bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
167279bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
16734fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
16744fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
16754fa0d573SSatish Balay 
167679bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
167779bdfe76SSatish Balay   b->cowners[0] = 0;
167879bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
167979bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
168079bdfe76SSatish Balay   }
168179bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
168279bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
16834fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
16844fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
168579bdfe76SSatish Balay 
168679bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1687029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
168879bdfe76SSatish Balay   PLogObjectParent(B,b->A);
168979bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1690029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
169179bdfe76SSatish Balay   PLogObjectParent(B,b->B);
169279bdfe76SSatish Balay 
169379bdfe76SSatish Balay   /* build cache for off array entries formed */
169479bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
169590f02eecSBarry Smith   b->donotstash  = 0;
169679bdfe76SSatish Balay   b->colmap      = 0;
169779bdfe76SSatish Balay   b->garray      = 0;
169879bdfe76SSatish Balay   b->roworiented = 1;
169979bdfe76SSatish Balay 
170030793edcSSatish Balay   /* stuff used in block assembly */
170130793edcSSatish Balay   b->barray      = 0;
170230793edcSSatish Balay 
170379bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
170479bdfe76SSatish Balay   b->lvec        = 0;
170579bdfe76SSatish Balay   b->Mvctx       = 0;
170679bdfe76SSatish Balay 
170779bdfe76SSatish Balay   /* stuff for MatGetRow() */
170879bdfe76SSatish Balay   b->rowindices   = 0;
170979bdfe76SSatish Balay   b->rowvalues    = 0;
171079bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
171179bdfe76SSatish Balay 
171279bdfe76SSatish Balay   *A = B;
171379bdfe76SSatish Balay   return 0;
171479bdfe76SSatish Balay }
1715026e39d0SSatish Balay 
17165615d1e5SSatish Balay #undef __FUNC__
17175615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
17180ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
17190ac07820SSatish Balay {
17200ac07820SSatish Balay   Mat         mat;
17210ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
17220ac07820SSatish Balay   int         ierr, len=0, flg;
17230ac07820SSatish Balay 
17240ac07820SSatish Balay   *newmat       = 0;
1725f09e8eb9SSatish Balay   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
17260ac07820SSatish Balay   PLogObjectCreate(mat);
17270ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
17280ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
17290ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
17300ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
17310ac07820SSatish Balay   mat->factor     = matin->factor;
17320ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
17330ac07820SSatish Balay 
17340ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
17350ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
17360ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
17370ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
17380ac07820SSatish Balay 
17390ac07820SSatish Balay   a->bs  = oldmat->bs;
17400ac07820SSatish Balay   a->bs2 = oldmat->bs2;
17410ac07820SSatish Balay   a->mbs = oldmat->mbs;
17420ac07820SSatish Balay   a->nbs = oldmat->nbs;
17430ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
17440ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
17450ac07820SSatish Balay 
17460ac07820SSatish Balay   a->rstart       = oldmat->rstart;
17470ac07820SSatish Balay   a->rend         = oldmat->rend;
17480ac07820SSatish Balay   a->cstart       = oldmat->cstart;
17490ac07820SSatish Balay   a->cend         = oldmat->cend;
17500ac07820SSatish Balay   a->size         = oldmat->size;
17510ac07820SSatish Balay   a->rank         = oldmat->rank;
1752e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
17530ac07820SSatish Balay   a->rowvalues    = 0;
17540ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
175530793edcSSatish Balay   a->barray       = 0;
17560ac07820SSatish Balay 
17570ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1758f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
17590ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
17600ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
17610ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
17620ac07820SSatish Balay   if (oldmat->colmap) {
17630ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
17640ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
17650ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
17660ac07820SSatish Balay   } else a->colmap = 0;
17670ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
17680ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
17690ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
17700ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
17710ac07820SSatish Balay   } else a->garray = 0;
17720ac07820SSatish Balay 
17730ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
17740ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
17750ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
17760ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
17770ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
17780ac07820SSatish Balay   PLogObjectParent(mat,a->A);
17790ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
17800ac07820SSatish Balay   PLogObjectParent(mat,a->B);
17810ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
17820ac07820SSatish Balay   if (flg) {
17830ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
17840ac07820SSatish Balay   }
17850ac07820SSatish Balay   *newmat = mat;
17860ac07820SSatish Balay   return 0;
17870ac07820SSatish Balay }
178857b952d6SSatish Balay 
178957b952d6SSatish Balay #include "sys.h"
179057b952d6SSatish Balay 
17915615d1e5SSatish Balay #undef __FUNC__
17925615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
179357b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
179457b952d6SSatish Balay {
179557b952d6SSatish Balay   Mat          A;
179657b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
179757b952d6SSatish Balay   Scalar       *vals,*buf;
179857b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
179957b952d6SSatish Balay   MPI_Status   status;
1800cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
180157b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
180257b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
180357b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
180457b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
180557b952d6SSatish Balay 
180657b952d6SSatish Balay 
180757b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
180857b952d6SSatish Balay   bs2  = bs*bs;
180957b952d6SSatish Balay 
181057b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
181157b952d6SSatish Balay   if (!rank) {
181257b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
181357b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1814e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
181557b952d6SSatish Balay   }
181657b952d6SSatish Balay 
181757b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
181857b952d6SSatish Balay   M = header[1]; N = header[2];
181957b952d6SSatish Balay 
1820e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
182157b952d6SSatish Balay 
182257b952d6SSatish Balay   /*
182357b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
182457b952d6SSatish Balay      divisible by the blocksize
182557b952d6SSatish Balay   */
182657b952d6SSatish Balay   Mbs        = M/bs;
182757b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
182857b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
182957b952d6SSatish Balay   else                  Mbs++;
183057b952d6SSatish Balay   if (extra_rows &&!rank) {
1831b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
183257b952d6SSatish Balay   }
1833537820f0SBarry Smith 
183457b952d6SSatish Balay   /* determine ownership of all rows */
183557b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
183657b952d6SSatish Balay   m   = mbs * bs;
1837cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1838cee3aa6bSSatish Balay   browners = rowners + size + 1;
183957b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
184057b952d6SSatish Balay   rowners[0] = 0;
1841cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1842cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
184357b952d6SSatish Balay   rstart = rowners[rank];
184457b952d6SSatish Balay   rend   = rowners[rank+1];
184557b952d6SSatish Balay 
184657b952d6SSatish Balay   /* distribute row lengths to all processors */
184757b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
184857b952d6SSatish Balay   if (!rank) {
184957b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
185057b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
185157b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
185257b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1853cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1854cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
185557b952d6SSatish Balay     PetscFree(sndcounts);
185657b952d6SSatish Balay   }
185757b952d6SSatish Balay   else {
185857b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
185957b952d6SSatish Balay   }
186057b952d6SSatish Balay 
186157b952d6SSatish Balay   if (!rank) {
186257b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
186357b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
186457b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
186557b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
186657b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
186757b952d6SSatish Balay         procsnz[i] += rowlengths[j];
186857b952d6SSatish Balay       }
186957b952d6SSatish Balay     }
187057b952d6SSatish Balay     PetscFree(rowlengths);
187157b952d6SSatish Balay 
187257b952d6SSatish Balay     /* determine max buffer needed and allocate it */
187357b952d6SSatish Balay     maxnz = 0;
187457b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
187557b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
187657b952d6SSatish Balay     }
187757b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
187857b952d6SSatish Balay 
187957b952d6SSatish Balay     /* read in my part of the matrix column indices  */
188057b952d6SSatish Balay     nz = procsnz[0];
188157b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
188257b952d6SSatish Balay     mycols = ibuf;
1883cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
188457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1885cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1886cee3aa6bSSatish Balay 
188757b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
188857b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
188957b952d6SSatish Balay       nz = procsnz[i];
189057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
189157b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
189257b952d6SSatish Balay     }
189357b952d6SSatish Balay     /* read in the stuff for the last proc */
189457b952d6SSatish Balay     if ( size != 1 ) {
189557b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
189657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
189757b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1898cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
189957b952d6SSatish Balay     }
190057b952d6SSatish Balay     PetscFree(cols);
190157b952d6SSatish Balay   }
190257b952d6SSatish Balay   else {
190357b952d6SSatish Balay     /* determine buffer space needed for message */
190457b952d6SSatish Balay     nz = 0;
190557b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
190657b952d6SSatish Balay       nz += locrowlens[i];
190757b952d6SSatish Balay     }
190857b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
190957b952d6SSatish Balay     mycols = ibuf;
191057b952d6SSatish Balay     /* receive message of column indices*/
191157b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
191257b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1913e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
191457b952d6SSatish Balay   }
191557b952d6SSatish Balay 
191657b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1917cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1918cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
191957b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1920cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
192157b952d6SSatish Balay   masked1 = mask    + Mbs;
192257b952d6SSatish Balay   masked2 = masked1 + Mbs;
192357b952d6SSatish Balay   rowcount = 0; nzcount = 0;
192457b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
192557b952d6SSatish Balay     dcount  = 0;
192657b952d6SSatish Balay     odcount = 0;
192757b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
192857b952d6SSatish Balay       kmax = locrowlens[rowcount];
192957b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
193057b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
193157b952d6SSatish Balay         if (!mask[tmp]) {
193257b952d6SSatish Balay           mask[tmp] = 1;
193357b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
193457b952d6SSatish Balay           else masked1[dcount++] = tmp;
193557b952d6SSatish Balay         }
193657b952d6SSatish Balay       }
193757b952d6SSatish Balay       rowcount++;
193857b952d6SSatish Balay     }
1939cee3aa6bSSatish Balay 
194057b952d6SSatish Balay     dlens[i]  = dcount;
194157b952d6SSatish Balay     odlens[i] = odcount;
1942cee3aa6bSSatish Balay 
194357b952d6SSatish Balay     /* zero out the mask elements we set */
194457b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
194557b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
194657b952d6SSatish Balay   }
1947cee3aa6bSSatish Balay 
194857b952d6SSatish Balay   /* create our matrix */
1949537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1950537820f0SBarry Smith          CHKERRQ(ierr);
195157b952d6SSatish Balay   A = *newmat;
19526d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
195357b952d6SSatish Balay 
195457b952d6SSatish Balay   if (!rank) {
195557b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
195657b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
195757b952d6SSatish Balay     nz = procsnz[0];
195857b952d6SSatish Balay     vals = buf;
1959cee3aa6bSSatish Balay     mycols = ibuf;
1960cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
196157b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1962cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1963537820f0SBarry Smith 
196457b952d6SSatish Balay     /* insert into matrix */
196557b952d6SSatish Balay     jj      = rstart*bs;
196657b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
196757b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
196857b952d6SSatish Balay       mycols += locrowlens[i];
196957b952d6SSatish Balay       vals   += locrowlens[i];
197057b952d6SSatish Balay       jj++;
197157b952d6SSatish Balay     }
197257b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
197357b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
197457b952d6SSatish Balay       nz = procsnz[i];
197557b952d6SSatish Balay       vals = buf;
197657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
197757b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
197857b952d6SSatish Balay     }
197957b952d6SSatish Balay     /* the last proc */
198057b952d6SSatish Balay     if ( size != 1 ){
198157b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1982cee3aa6bSSatish Balay       vals = buf;
198357b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
198457b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1985cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
198657b952d6SSatish Balay     }
198757b952d6SSatish Balay     PetscFree(procsnz);
198857b952d6SSatish Balay   }
198957b952d6SSatish Balay   else {
199057b952d6SSatish Balay     /* receive numeric values */
199157b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
199257b952d6SSatish Balay 
199357b952d6SSatish Balay     /* receive message of values*/
199457b952d6SSatish Balay     vals = buf;
1995cee3aa6bSSatish Balay     mycols = ibuf;
199657b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
199757b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1998e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
199957b952d6SSatish Balay 
200057b952d6SSatish Balay     /* insert into matrix */
200157b952d6SSatish Balay     jj      = rstart*bs;
2002cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
200357b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
200457b952d6SSatish Balay       mycols += locrowlens[i];
200557b952d6SSatish Balay       vals   += locrowlens[i];
200657b952d6SSatish Balay       jj++;
200757b952d6SSatish Balay     }
200857b952d6SSatish Balay   }
200957b952d6SSatish Balay   PetscFree(locrowlens);
201057b952d6SSatish Balay   PetscFree(buf);
201157b952d6SSatish Balay   PetscFree(ibuf);
201257b952d6SSatish Balay   PetscFree(rowners);
201357b952d6SSatish Balay   PetscFree(dlens);
2014cee3aa6bSSatish Balay   PetscFree(mask);
20156d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
20166d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
201757b952d6SSatish Balay   return 0;
201857b952d6SSatish Balay }
201957b952d6SSatish Balay 
202057b952d6SSatish Balay 
2021