xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 83e2fdc756c13f964f3ed5aaff42e3626c5b72bc)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*83e2fdc7SBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.74 1997/07/28 16:13:24 bsmith Exp bsmith $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2257b952d6SSatish Balay 
233b2fbd54SBarry Smith 
24537820f0SBarry Smith /*
25537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2857b952d6SSatish Balay    length of colmap equals the global matrix length.
2957b952d6SSatish Balay */
305615d1e5SSatish Balay #undef __FUNC__
315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3357b952d6SSatish Balay {
3457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
36928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3757b952d6SSatish Balay 
38758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4057b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
41928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4257b952d6SSatish Balay   return 0;
4357b952d6SSatish Balay }
4457b952d6SSatish Balay 
455615d1e5SSatish Balay #undef __FUNC__
465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
483b2fbd54SBarry Smith                             PetscTruth *done)
4957b952d6SSatish Balay {
503b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5157b952d6SSatish Balay   int         ierr;
523b2fbd54SBarry Smith   if (aij->size == 1) {
533b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
553b2fbd54SBarry Smith   return 0;
563b2fbd54SBarry Smith }
573b2fbd54SBarry Smith 
585615d1e5SSatish Balay #undef __FUNC__
595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
613b2fbd54SBarry Smith                                 PetscTruth *done)
623b2fbd54SBarry Smith {
633b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
643b2fbd54SBarry Smith   int        ierr;
653b2fbd54SBarry Smith   if (aij->size == 1) {
663b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
67e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6857b952d6SSatish Balay   return 0;
6957b952d6SSatish Balay }
7080c1aa95SSatish Balay #define CHUNKSIZE  10
7180c1aa95SSatish Balay 
72f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7380c1aa95SSatish Balay { \
7480c1aa95SSatish Balay  \
7580c1aa95SSatish Balay     brow = row/bs;  \
7680c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
77ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
7880c1aa95SSatish Balay       bcol = col/bs; \
7980c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
80ab26458aSBarry Smith       low = 0; high = nrow; \
81ab26458aSBarry Smith       while (high-low > 3) { \
82ab26458aSBarry Smith         t = (low+high)/2; \
83ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
84ab26458aSBarry Smith         else              low  = t; \
85ab26458aSBarry Smith       } \
86ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
8780c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8880c1aa95SSatish Balay         if (rp[_i] == bcol) { \
8980c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
90eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
91eada6651SSatish Balay           else                    *bap  = value;  \
92ac7a638eSSatish Balay           goto a_noinsert; \
9380c1aa95SSatish Balay         } \
9480c1aa95SSatish Balay       } \
9589280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
9611ebbc71SLois Curfman McInnes       else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
9780c1aa95SSatish Balay       if (nrow >= rmax) { \
9880c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9980c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
10080c1aa95SSatish Balay         Scalar *new_a; \
10180c1aa95SSatish Balay  \
10211ebbc71SLois Curfman McInnes         if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
10389280ab3SLois Curfman McInnes  \
10480c1aa95SSatish Balay         /* malloc new storage space */ \
10580c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
10680c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
10780c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10880c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10980c1aa95SSatish Balay  \
11080c1aa95SSatish Balay         /* copy over old data into new slots */ \
11180c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
11280c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
11380c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
11480c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
11580c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
11680c1aa95SSatish Balay                                                            len*sizeof(int)); \
11780c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11880c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11980c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
12080c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
12180c1aa95SSatish Balay         /* free up old matrix storage */ \
12280c1aa95SSatish Balay         PetscFree(a->a);  \
12380c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
12480c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
12580c1aa95SSatish Balay         a->singlemalloc = 1; \
12680c1aa95SSatish Balay  \
12780c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
128ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
12980c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
13080c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
13180c1aa95SSatish Balay         a->reallocs++; \
13280c1aa95SSatish Balay         a->nz++; \
13380c1aa95SSatish Balay       } \
13480c1aa95SSatish Balay       N = nrow++ - 1;  \
13580c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
13680c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
13780c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13880c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13980c1aa95SSatish Balay       } \
14080c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
14180c1aa95SSatish Balay       rp[_i]                      = bcol;  \
14280c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
143ac7a638eSSatish Balay       a_noinsert:; \
14480c1aa95SSatish Balay     ailen[brow] = nrow; \
14580c1aa95SSatish Balay }
14657b952d6SSatish Balay 
147ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
148ac7a638eSSatish Balay { \
149ac7a638eSSatish Balay  \
150ac7a638eSSatish Balay     brow = row/bs;  \
151ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
153ac7a638eSSatish Balay       bcol = col/bs; \
154ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
155ac7a638eSSatish Balay       low = 0; high = nrow; \
156ac7a638eSSatish Balay       while (high-low > 3) { \
157ac7a638eSSatish Balay         t = (low+high)/2; \
158ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
159ac7a638eSSatish Balay         else              low  = t; \
160ac7a638eSSatish Balay       } \
161ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
162ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
163ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
164ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
165ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
166ac7a638eSSatish Balay           else                    *bap  = value;  \
167ac7a638eSSatish Balay           goto b_noinsert; \
168ac7a638eSSatish Balay         } \
169ac7a638eSSatish Balay       } \
17089280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
17111ebbc71SLois Curfman McInnes       else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
172ac7a638eSSatish Balay       if (nrow >= rmax) { \
173ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
17474c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
175ac7a638eSSatish Balay         Scalar *new_a; \
176ac7a638eSSatish Balay  \
17711ebbc71SLois Curfman McInnes         if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
17889280ab3SLois Curfman McInnes  \
179ac7a638eSSatish Balay         /* malloc new storage space */ \
18074c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
181ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
182ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
183ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
184ac7a638eSSatish Balay  \
185ac7a638eSSatish Balay         /* copy over old data into new slots */ \
186ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
18774c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
188ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
189ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
190ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
191ac7a638eSSatish Balay                                                            len*sizeof(int)); \
192ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
193ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
194ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
195ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
196ac7a638eSSatish Balay         /* free up old matrix storage */ \
19774c639caSSatish Balay         PetscFree(b->a);  \
19874c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
19974c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
20074c639caSSatish Balay         b->singlemalloc = 1; \
201ac7a638eSSatish Balay  \
202ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
203ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
20474c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
20574c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
20674c639caSSatish Balay         b->reallocs++; \
20774c639caSSatish Balay         b->nz++; \
208ac7a638eSSatish Balay       } \
209ac7a638eSSatish Balay       N = nrow++ - 1;  \
210ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
211ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
212ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
213ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
214ac7a638eSSatish Balay       } \
215ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
216ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
217ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
218ac7a638eSSatish Balay       b_noinsert:; \
219ac7a638eSSatish Balay     bilen[brow] = nrow; \
220ac7a638eSSatish Balay }
221ac7a638eSSatish Balay 
2225615d1e5SSatish Balay #undef __FUNC__
2235615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
224ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
22557b952d6SSatish Balay {
22657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
22757b952d6SSatish Balay   Scalar      value;
2284fa0d573SSatish Balay   int         ierr,i,j,row,col;
2294fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2304fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2314fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
23257b952d6SSatish Balay 
233eada6651SSatish Balay   /* Some Variables required in the macro */
23480c1aa95SSatish Balay   Mat         A = baij->A;
23580c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
236ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
237ac7a638eSSatish Balay   Scalar      *aa=a->a;
238ac7a638eSSatish Balay 
239ac7a638eSSatish Balay   Mat         B = baij->B;
240ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
241ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
242ac7a638eSSatish Balay   Scalar      *ba=b->a;
243ac7a638eSSatish Balay 
244ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
245ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
246ac7a638eSSatish Balay   Scalar      *ap,*bap;
24780c1aa95SSatish Balay 
24857b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
249639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
250e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
251e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
252639f9d9dSBarry Smith #endif
25357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
25457b952d6SSatish Balay       row = im[i] - rstart_orig;
25557b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
25657b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
25757b952d6SSatish Balay           col = in[j] - cstart_orig;
25857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
26080c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
26157b952d6SSatish Balay         }
262639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
263e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
264e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
265639f9d9dSBarry Smith #endif
26657b952d6SSatish Balay         else {
26757b952d6SSatish Balay           if (mat->was_assembled) {
268905e6a2fSBarry Smith             if (!baij->colmap) {
269905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
270905e6a2fSBarry Smith             }
271905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
27257b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
27357b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
27457b952d6SSatish Balay               col =  in[j];
2759bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2769bf004c3SSatish Balay               B = baij->B;
2779bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2789bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2799bf004c3SSatish Balay               ba=b->a;
28057b952d6SSatish Balay             }
28157b952d6SSatish Balay           }
28257b952d6SSatish Balay           else col = in[j];
28357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
284ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
285ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
28657b952d6SSatish Balay         }
28757b952d6SSatish Balay       }
28857b952d6SSatish Balay     }
28957b952d6SSatish Balay     else {
29090f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
29157b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
29257b952d6SSatish Balay       }
29357b952d6SSatish Balay       else {
29490f02eecSBarry Smith         if (!baij->donotstash) {
29557b952d6SSatish Balay           row = im[i];
29657b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
29757b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
29857b952d6SSatish Balay           }
29957b952d6SSatish Balay         }
30057b952d6SSatish Balay       }
30157b952d6SSatish Balay     }
30290f02eecSBarry Smith   }
30357b952d6SSatish Balay   return 0;
30457b952d6SSatish Balay }
30557b952d6SSatish Balay 
306ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
307ab26458aSBarry Smith #undef __FUNC__
308ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
309ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
310ab26458aSBarry Smith {
311ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
31230793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
313abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
314ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
315ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
316ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
317ab26458aSBarry Smith 
31830793edcSSatish Balay 
31930793edcSSatish Balay   if(!barray) {
32047513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
32130793edcSSatish Balay   }
32230793edcSSatish Balay 
323ab26458aSBarry Smith   if (roworiented) {
324ab26458aSBarry Smith     stepval = (n-1)*bs;
325ab26458aSBarry Smith   } else {
326ab26458aSBarry Smith     stepval = (m-1)*bs;
327ab26458aSBarry Smith   }
328ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
329ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
330ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
331ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
332ab26458aSBarry Smith #endif
333ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
334ab26458aSBarry Smith       row = im[i] - rstart;
335ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
33615b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
33715b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
33815b57d14SSatish Balay           barray = v + i*bs2;
33915b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
34015b57d14SSatish Balay           barray = v + j*bs2;
34115b57d14SSatish Balay         } else { /* Here a copy is required */
342ab26458aSBarry Smith           if (roworiented) {
343ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
344ab26458aSBarry Smith           } else {
345ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
346abef11f7SSatish Balay           }
34747513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
34847513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
34930793edcSSatish Balay               *barray++  = *value++;
35047513183SBarry Smith             }
35147513183SBarry Smith           }
35230793edcSSatish Balay           barray -=bs2;
35315b57d14SSatish Balay         }
354abef11f7SSatish Balay 
355abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
356abef11f7SSatish Balay           col  = in[j] - cstart;
35730793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
358ab26458aSBarry Smith         }
359ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
360ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
36147513183SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Column too large");}
362ab26458aSBarry Smith #endif
363ab26458aSBarry Smith         else {
364ab26458aSBarry Smith           if (mat->was_assembled) {
365ab26458aSBarry Smith             if (!baij->colmap) {
366ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
367ab26458aSBarry Smith             }
368a5eb4965SSatish Balay 
369a5eb4965SSatish Balay #if defined(PETSC_BOPT_g)
370a5eb4965SSatish Balay             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(1,0,"Incorrect colmap");}
371a5eb4965SSatish Balay #endif
372a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
373ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
374ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
375ab26458aSBarry Smith               col =  in[j];
376ab26458aSBarry Smith             }
377ab26458aSBarry Smith           }
378ab26458aSBarry Smith           else col = in[j];
37930793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
380ab26458aSBarry Smith         }
381ab26458aSBarry Smith       }
382ab26458aSBarry Smith     }
383ab26458aSBarry Smith     else {
384ab26458aSBarry Smith       if (!baij->donotstash) {
385ab26458aSBarry Smith         if (roworiented ) {
386abef11f7SSatish Balay           row   = im[i]*bs;
387abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
388abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
389abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
390abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
391abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
392abef11f7SSatish Balay               }
393ab26458aSBarry Smith             }
394ab26458aSBarry Smith           }
395ab26458aSBarry Smith         }
396ab26458aSBarry Smith         else {
397ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
398abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
399abef11f7SSatish Balay             col   = in[j]*bs;
400abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
401abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
402abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
403ab26458aSBarry Smith               }
404ab26458aSBarry Smith             }
405ab26458aSBarry Smith           }
406abef11f7SSatish Balay         }
407abef11f7SSatish Balay       }
408ab26458aSBarry Smith     }
409ab26458aSBarry Smith   }
410ab26458aSBarry Smith   return 0;
411ab26458aSBarry Smith }
412ab26458aSBarry Smith 
4135615d1e5SSatish Balay #undef __FUNC__
4145615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
415ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
416d6de1c52SSatish Balay {
417d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
418d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
419d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
420d6de1c52SSatish Balay 
421d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
422e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
423e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
424d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
425d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
426d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
427e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
428e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
429d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
430d6de1c52SSatish Balay           col = idxn[j] - bscstart;
431d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
432d6de1c52SSatish Balay         }
433d6de1c52SSatish Balay         else {
434905e6a2fSBarry Smith           if (!baij->colmap) {
435905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
436905e6a2fSBarry Smith           }
437e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
438dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
439d9d09a02SSatish Balay           else {
440dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
441d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
442d6de1c52SSatish Balay           }
443d6de1c52SSatish Balay         }
444d6de1c52SSatish Balay       }
445d9d09a02SSatish Balay     }
446d6de1c52SSatish Balay     else {
447e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
448d6de1c52SSatish Balay     }
449d6de1c52SSatish Balay   }
450d6de1c52SSatish Balay   return 0;
451d6de1c52SSatish Balay }
452d6de1c52SSatish Balay 
4535615d1e5SSatish Balay #undef __FUNC__
4545615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
455ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
456d6de1c52SSatish Balay {
457d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
458d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
459acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
460d6de1c52SSatish Balay   double     sum = 0.0;
461d6de1c52SSatish Balay   Scalar     *v;
462d6de1c52SSatish Balay 
463d6de1c52SSatish Balay   if (baij->size == 1) {
464d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
465d6de1c52SSatish Balay   } else {
466d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
467d6de1c52SSatish Balay       v = amat->a;
468d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
469d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
470d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
471d6de1c52SSatish Balay #else
472d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
473d6de1c52SSatish Balay #endif
474d6de1c52SSatish Balay       }
475d6de1c52SSatish Balay       v = bmat->a;
476d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
477d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
478d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
479d6de1c52SSatish Balay #else
480d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
481d6de1c52SSatish Balay #endif
482d6de1c52SSatish Balay       }
483d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
484d6de1c52SSatish Balay       *norm = sqrt(*norm);
485d6de1c52SSatish Balay     }
486acdf5bf4SSatish Balay     else
487e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
488d6de1c52SSatish Balay   }
489d6de1c52SSatish Balay   return 0;
490d6de1c52SSatish Balay }
49157b952d6SSatish Balay 
4925615d1e5SSatish Balay #undef __FUNC__
4935615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
494ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
49557b952d6SSatish Balay {
49657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
49757b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
49857b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
49957b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
50057b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
50157b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
50257b952d6SSatish Balay   InsertMode  addv;
50357b952d6SSatish Balay   Scalar      *rvalues,*svalues;
50457b952d6SSatish Balay 
50557b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
506e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
50757b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
508e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
50957b952d6SSatish Balay   }
510e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
51157b952d6SSatish Balay 
51257b952d6SSatish Balay   /*  first count number of contributors to each processor */
51357b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
51457b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
51557b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
51657b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
51757b952d6SSatish Balay     idx = baij->stash.idx[i];
51857b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
51957b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
52057b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
52157b952d6SSatish Balay       }
52257b952d6SSatish Balay     }
52357b952d6SSatish Balay   }
52457b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
52557b952d6SSatish Balay 
52657b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
52757b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
52857b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
52957b952d6SSatish Balay   nreceives = work[rank];
53057b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
53157b952d6SSatish Balay   nmax = work[rank];
53257b952d6SSatish Balay   PetscFree(work);
53357b952d6SSatish Balay 
53457b952d6SSatish Balay   /* post receives:
53557b952d6SSatish Balay        1) each message will consist of ordered pairs
53657b952d6SSatish Balay      (global index,value) we store the global index as a double
53757b952d6SSatish Balay      to simplify the message passing.
53857b952d6SSatish Balay        2) since we don't know how long each individual message is we
53957b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
54057b952d6SSatish Balay      this is a lot of wasted space.
54157b952d6SSatish Balay 
54257b952d6SSatish Balay 
54357b952d6SSatish Balay        This could be done better.
54457b952d6SSatish Balay   */
54557b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
54657b952d6SSatish Balay   CHKPTRQ(rvalues);
54757b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
54857b952d6SSatish Balay   CHKPTRQ(recv_waits);
54957b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
55057b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
55157b952d6SSatish Balay               comm,recv_waits+i);
55257b952d6SSatish Balay   }
55357b952d6SSatish Balay 
55457b952d6SSatish Balay   /* do sends:
55557b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
55657b952d6SSatish Balay          the ith processor
55757b952d6SSatish Balay   */
55857b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
55957b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
56057b952d6SSatish Balay   CHKPTRQ(send_waits);
56157b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
56257b952d6SSatish Balay   starts[0] = 0;
56357b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
56457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
56557b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
56657b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
56757b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
56857b952d6SSatish Balay   }
56957b952d6SSatish Balay   PetscFree(owner);
57057b952d6SSatish Balay   starts[0] = 0;
57157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
57257b952d6SSatish Balay   count = 0;
57357b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
57457b952d6SSatish Balay     if (procs[i]) {
57557b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
57657b952d6SSatish Balay                 comm,send_waits+count++);
57757b952d6SSatish Balay     }
57857b952d6SSatish Balay   }
57957b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
58057b952d6SSatish Balay 
58157b952d6SSatish Balay   /* Free cache space */
582d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
58357b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
58457b952d6SSatish Balay 
58557b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
58657b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
58757b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
58857b952d6SSatish Balay   baij->rmax       = nmax;
58957b952d6SSatish Balay 
59057b952d6SSatish Balay   return 0;
59157b952d6SSatish Balay }
59257b952d6SSatish Balay 
59357b952d6SSatish Balay 
5945615d1e5SSatish Balay #undef __FUNC__
5955615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
596ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
59757b952d6SSatish Balay {
59857b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
59957b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
60057b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
60157b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
60257b952d6SSatish Balay   Scalar      *values,val;
603e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
60457b952d6SSatish Balay 
60557b952d6SSatish Balay   /*  wait on receives */
60657b952d6SSatish Balay   while (count) {
60757b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
60857b952d6SSatish Balay     /* unpack receives into our local space */
60957b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
61057b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
61157b952d6SSatish Balay     n = n/3;
61257b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
61357b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
61457b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
61557b952d6SSatish Balay       val = values[3*i+2];
61657b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
61757b952d6SSatish Balay         col -= baij->cstart*bs;
6186fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
61957b952d6SSatish Balay       }
62057b952d6SSatish Balay       else {
62157b952d6SSatish Balay         if (mat->was_assembled) {
622905e6a2fSBarry Smith           if (!baij->colmap) {
623905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
624905e6a2fSBarry Smith           }
625a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
62657b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
62757b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
62857b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
62957b952d6SSatish Balay           }
63057b952d6SSatish Balay         }
6316fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
63257b952d6SSatish Balay       }
63357b952d6SSatish Balay     }
63457b952d6SSatish Balay     count--;
63557b952d6SSatish Balay   }
63657b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
63757b952d6SSatish Balay 
63857b952d6SSatish Balay   /* wait on sends */
63957b952d6SSatish Balay   if (baij->nsends) {
64057b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
64157b952d6SSatish Balay     CHKPTRQ(send_status);
64257b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
64357b952d6SSatish Balay     PetscFree(send_status);
64457b952d6SSatish Balay   }
64557b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
64657b952d6SSatish Balay 
64757b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
64857b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
64957b952d6SSatish Balay 
65057b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
65157b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
65257b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
65357b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
65457b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
65557b952d6SSatish Balay   }
65657b952d6SSatish Balay 
6576d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
65857b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
65957b952d6SSatish Balay   }
66057b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
66157b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
66257b952d6SSatish Balay 
66357b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
66457b952d6SSatish Balay   return 0;
66557b952d6SSatish Balay }
66657b952d6SSatish Balay 
6675615d1e5SSatish Balay #undef __FUNC__
6685615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
66957b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
67057b952d6SSatish Balay {
67157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
67257b952d6SSatish Balay   int          ierr;
67357b952d6SSatish Balay 
67457b952d6SSatish Balay   if (baij->size == 1) {
67557b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
67657b952d6SSatish Balay   }
677e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
67857b952d6SSatish Balay   return 0;
67957b952d6SSatish Balay }
68057b952d6SSatish Balay 
6815615d1e5SSatish Balay #undef __FUNC__
6825615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
68357b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
68457b952d6SSatish Balay {
68557b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
686cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
68757b952d6SSatish Balay   FILE         *fd;
68857b952d6SSatish Balay   ViewerType   vtype;
68957b952d6SSatish Balay 
69057b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
69157b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
69257b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
693639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
6944e220ebcSLois Curfman McInnes       MatInfo info;
69557b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
69657b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6974e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
69857b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
69957b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
7004e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
7014e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
7024e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
7034e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
7044e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
7054e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
70657b952d6SSatish Balay       fflush(fd);
70757b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
70857b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
70957b952d6SSatish Balay       return 0;
71057b952d6SSatish Balay     }
711639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
712bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
71357b952d6SSatish Balay       return 0;
71457b952d6SSatish Balay     }
71557b952d6SSatish Balay   }
71657b952d6SSatish Balay 
71757b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
71857b952d6SSatish Balay     Draw       draw;
71957b952d6SSatish Balay     PetscTruth isnull;
72057b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
72157b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
72257b952d6SSatish Balay   }
72357b952d6SSatish Balay 
72457b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
72557b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
72657b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
72757b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
72857b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
72957b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
73057b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
73157b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
73257b952d6SSatish Balay     fflush(fd);
73357b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
73457b952d6SSatish Balay   }
73557b952d6SSatish Balay   else {
73657b952d6SSatish Balay     int size = baij->size;
73757b952d6SSatish Balay     rank = baij->rank;
73857b952d6SSatish Balay     if (size == 1) {
73957b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
74057b952d6SSatish Balay     }
74157b952d6SSatish Balay     else {
74257b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
74357b952d6SSatish Balay       Mat         A;
74457b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
74557b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
74657b952d6SSatish Balay       int         mbs=baij->mbs;
74757b952d6SSatish Balay       Scalar      *a;
74857b952d6SSatish Balay 
74957b952d6SSatish Balay       if (!rank) {
750cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
75157b952d6SSatish Balay         CHKERRQ(ierr);
75257b952d6SSatish Balay       }
75357b952d6SSatish Balay       else {
754cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
75557b952d6SSatish Balay         CHKERRQ(ierr);
75657b952d6SSatish Balay       }
75757b952d6SSatish Balay       PLogObjectParent(mat,A);
75857b952d6SSatish Balay 
75957b952d6SSatish Balay       /* copy over the A part */
76057b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
76157b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
76257b952d6SSatish Balay       row = baij->rstart;
76357b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
76457b952d6SSatish Balay 
76557b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
76657b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
76757b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
76857b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
76957b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
77057b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
771cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
772cee3aa6bSSatish Balay             col++; a += bs;
77357b952d6SSatish Balay           }
77457b952d6SSatish Balay         }
77557b952d6SSatish Balay       }
77657b952d6SSatish Balay       /* copy over the B part */
77757b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
77857b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
77957b952d6SSatish Balay       row = baij->rstart*bs;
78057b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
78157b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
78257b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
78357b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
78457b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
78557b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
786cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
787cee3aa6bSSatish Balay             col++; a += bs;
78857b952d6SSatish Balay           }
78957b952d6SSatish Balay         }
79057b952d6SSatish Balay       }
79157b952d6SSatish Balay       PetscFree(rvals);
7926d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7936d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
79457b952d6SSatish Balay       if (!rank) {
79557b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
79657b952d6SSatish Balay       }
79757b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
79857b952d6SSatish Balay     }
79957b952d6SSatish Balay   }
80057b952d6SSatish Balay   return 0;
80157b952d6SSatish Balay }
80257b952d6SSatish Balay 
80357b952d6SSatish Balay 
80457b952d6SSatish Balay 
8055615d1e5SSatish Balay #undef __FUNC__
8065615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
807ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
80857b952d6SSatish Balay {
80957b952d6SSatish Balay   Mat         mat = (Mat) obj;
81057b952d6SSatish Balay   int         ierr;
81157b952d6SSatish Balay   ViewerType  vtype;
81257b952d6SSatish Balay 
81357b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
81457b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
81557b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
81657b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
81757b952d6SSatish Balay   }
81857b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
81957b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
82057b952d6SSatish Balay   }
82157b952d6SSatish Balay   return 0;
82257b952d6SSatish Balay }
82357b952d6SSatish Balay 
8245615d1e5SSatish Balay #undef __FUNC__
8255615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
826ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
82779bdfe76SSatish Balay {
82879bdfe76SSatish Balay   Mat         mat = (Mat) obj;
82979bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
83079bdfe76SSatish Balay   int         ierr;
83179bdfe76SSatish Balay 
83279bdfe76SSatish Balay #if defined(PETSC_LOG)
83379bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
83479bdfe76SSatish Balay #endif
83579bdfe76SSatish Balay 
836*83e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
83779bdfe76SSatish Balay   PetscFree(baij->rowners);
83879bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
83979bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
84079bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
84179bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
84279bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
84379bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
84479bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
84530793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
84679bdfe76SSatish Balay   PetscFree(baij);
84790f02eecSBarry Smith   if (mat->mapping) {
84890f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
84990f02eecSBarry Smith   }
85079bdfe76SSatish Balay   PLogObjectDestroy(mat);
85179bdfe76SSatish Balay   PetscHeaderDestroy(mat);
85279bdfe76SSatish Balay   return 0;
85379bdfe76SSatish Balay }
85479bdfe76SSatish Balay 
8555615d1e5SSatish Balay #undef __FUNC__
8565615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
857ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
858cee3aa6bSSatish Balay {
859cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
86047b4a8eaSLois Curfman McInnes   int         ierr, nt;
861cee3aa6bSSatish Balay 
862c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
86347b4a8eaSLois Curfman McInnes   if (nt != a->n) {
864ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
86547b4a8eaSLois Curfman McInnes   }
866c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
86747b4a8eaSLois Curfman McInnes   if (nt != a->m) {
868e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
86947b4a8eaSLois Curfman McInnes   }
87043a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
871cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
87243a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
873cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
87443a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
875cee3aa6bSSatish Balay   return 0;
876cee3aa6bSSatish Balay }
877cee3aa6bSSatish Balay 
8785615d1e5SSatish Balay #undef __FUNC__
8795615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
880ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
881cee3aa6bSSatish Balay {
882cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
883cee3aa6bSSatish Balay   int        ierr;
88443a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
885cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
88643a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
887cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
888cee3aa6bSSatish Balay   return 0;
889cee3aa6bSSatish Balay }
890cee3aa6bSSatish Balay 
8915615d1e5SSatish Balay #undef __FUNC__
8925615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
893ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
894cee3aa6bSSatish Balay {
895cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
896cee3aa6bSSatish Balay   int        ierr;
897cee3aa6bSSatish Balay 
898cee3aa6bSSatish Balay   /* do nondiagonal part */
899cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
900cee3aa6bSSatish Balay   /* send it on its way */
901537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
902cee3aa6bSSatish Balay   /* do local part */
903cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
904cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
905cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
906cee3aa6bSSatish Balay   /* but is not perhaps always true. */
907639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
908cee3aa6bSSatish Balay   return 0;
909cee3aa6bSSatish Balay }
910cee3aa6bSSatish Balay 
9115615d1e5SSatish Balay #undef __FUNC__
9125615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
913ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
914cee3aa6bSSatish Balay {
915cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
916cee3aa6bSSatish Balay   int        ierr;
917cee3aa6bSSatish Balay 
918cee3aa6bSSatish Balay   /* do nondiagonal part */
919cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
920cee3aa6bSSatish Balay   /* send it on its way */
921537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
922cee3aa6bSSatish Balay   /* do local part */
923cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
924cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
925cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
926cee3aa6bSSatish Balay   /* but is not perhaps always true. */
927537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
928cee3aa6bSSatish Balay   return 0;
929cee3aa6bSSatish Balay }
930cee3aa6bSSatish Balay 
931cee3aa6bSSatish Balay /*
932cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
933cee3aa6bSSatish Balay    diagonal block
934cee3aa6bSSatish Balay */
9355615d1e5SSatish Balay #undef __FUNC__
9365615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
937ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
938cee3aa6bSSatish Balay {
939cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
940cee3aa6bSSatish Balay   if (a->M != a->N)
941e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
942cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
943cee3aa6bSSatish Balay }
944cee3aa6bSSatish Balay 
9455615d1e5SSatish Balay #undef __FUNC__
9465615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
947ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
948cee3aa6bSSatish Balay {
949cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
950cee3aa6bSSatish Balay   int        ierr;
951cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
952cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
953cee3aa6bSSatish Balay   return 0;
954cee3aa6bSSatish Balay }
955026e39d0SSatish Balay 
9565615d1e5SSatish Balay #undef __FUNC__
9575615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
958ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
95957b952d6SSatish Balay {
96057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
96157b952d6SSatish Balay   *m = mat->M; *n = mat->N;
96257b952d6SSatish Balay   return 0;
96357b952d6SSatish Balay }
96457b952d6SSatish Balay 
9655615d1e5SSatish Balay #undef __FUNC__
9665615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
967ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
96857b952d6SSatish Balay {
96957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
97057b952d6SSatish Balay   *m = mat->m; *n = mat->N;
97157b952d6SSatish Balay   return 0;
97257b952d6SSatish Balay }
97357b952d6SSatish Balay 
9745615d1e5SSatish Balay #undef __FUNC__
9755615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
976ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
97757b952d6SSatish Balay {
97857b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
97957b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
98057b952d6SSatish Balay   return 0;
98157b952d6SSatish Balay }
98257b952d6SSatish Balay 
983acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
984acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
985acdf5bf4SSatish Balay 
9865615d1e5SSatish Balay #undef __FUNC__
9875615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
988acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
989acdf5bf4SSatish Balay {
990acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
991acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
992acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
993d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
994d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
995acdf5bf4SSatish Balay 
996e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
997acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
998acdf5bf4SSatish Balay 
999acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1000acdf5bf4SSatish Balay     /*
1001acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1002acdf5bf4SSatish Balay     */
1003acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1004bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1005bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1006acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1007acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1008acdf5bf4SSatish Balay     }
1009acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1010acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1011acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1012acdf5bf4SSatish Balay   }
1013acdf5bf4SSatish Balay 
1014acdf5bf4SSatish Balay 
1015e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
1016d9d09a02SSatish Balay   lrow = row - brstart;
1017acdf5bf4SSatish Balay 
1018acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1019acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1020acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1021acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1022acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1023acdf5bf4SSatish Balay   nztot = nzA + nzB;
1024acdf5bf4SSatish Balay 
1025acdf5bf4SSatish Balay   cmap  = mat->garray;
1026acdf5bf4SSatish Balay   if (v  || idx) {
1027acdf5bf4SSatish Balay     if (nztot) {
1028acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1029acdf5bf4SSatish Balay       int imark = -1;
1030acdf5bf4SSatish Balay       if (v) {
1031acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1032acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1033d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1034acdf5bf4SSatish Balay           else break;
1035acdf5bf4SSatish Balay         }
1036acdf5bf4SSatish Balay         imark = i;
1037acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1038acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1039acdf5bf4SSatish Balay       }
1040acdf5bf4SSatish Balay       if (idx) {
1041acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1042acdf5bf4SSatish Balay         if (imark > -1) {
1043acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1044bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1045acdf5bf4SSatish Balay           }
1046acdf5bf4SSatish Balay         } else {
1047acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1048d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1049d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1050acdf5bf4SSatish Balay             else break;
1051acdf5bf4SSatish Balay           }
1052acdf5bf4SSatish Balay           imark = i;
1053acdf5bf4SSatish Balay         }
1054d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1055d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1056acdf5bf4SSatish Balay       }
1057acdf5bf4SSatish Balay     }
1058d212a18eSSatish Balay     else {
1059d212a18eSSatish Balay       if (idx) *idx = 0;
1060d212a18eSSatish Balay       if (v)   *v   = 0;
1061d212a18eSSatish Balay     }
1062acdf5bf4SSatish Balay   }
1063acdf5bf4SSatish Balay   *nz = nztot;
1064acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1065acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1066acdf5bf4SSatish Balay   return 0;
1067acdf5bf4SSatish Balay }
1068acdf5bf4SSatish Balay 
10695615d1e5SSatish Balay #undef __FUNC__
10705615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1071acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1072acdf5bf4SSatish Balay {
1073acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1074acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1075e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
1076acdf5bf4SSatish Balay   }
1077acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
1078acdf5bf4SSatish Balay   return 0;
1079acdf5bf4SSatish Balay }
1080acdf5bf4SSatish Balay 
10815615d1e5SSatish Balay #undef __FUNC__
10825615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1083ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
10845a838052SSatish Balay {
10855a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
10865a838052SSatish Balay   *bs = baij->bs;
10875a838052SSatish Balay   return 0;
10885a838052SSatish Balay }
10895a838052SSatish Balay 
10905615d1e5SSatish Balay #undef __FUNC__
10915615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1092ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
109358667388SSatish Balay {
109458667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
109558667388SSatish Balay   int         ierr;
109658667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
109758667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
109858667388SSatish Balay   return 0;
109958667388SSatish Balay }
11000ac07820SSatish Balay 
11015615d1e5SSatish Balay #undef __FUNC__
11025615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1103ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
11040ac07820SSatish Balay {
11054e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
11064e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
11077d57db60SLois Curfman McInnes   int         ierr;
11087d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
11090ac07820SSatish Balay 
11104e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
11114e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
11124e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
11134e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
11144e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
11154e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
11164e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
11174e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
11184e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
11190ac07820SSatish Balay   if (flag == MAT_LOCAL) {
11204e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
11214e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
11224e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
11234e220ebcSLois Curfman McInnes     info->memory       = isend[3];
11244e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
11250ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1126dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
11274e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11284e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11294e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11304e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11314e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11320ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1133dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
11344e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11354e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11364e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11374e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11384e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11390ac07820SSatish Balay   }
11404e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
11414e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
11424e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
11430ac07820SSatish Balay   return 0;
11440ac07820SSatish Balay }
11450ac07820SSatish Balay 
11465615d1e5SSatish Balay #undef __FUNC__
11475615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1148ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
114958667388SSatish Balay {
115058667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
115158667388SSatish Balay 
115258667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
115358667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
11546da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1155c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
115696854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1157c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1158b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1159b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1160b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1161aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
116258667388SSatish Balay         MatSetOption(a->A,op);
116358667388SSatish Balay         MatSetOption(a->B,op);
1164b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
11656da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
116658667388SSatish Balay              op == MAT_SYMMETRIC ||
116758667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
116858667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
116958667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
117058667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
117158667388SSatish Balay     a->roworiented = 0;
117258667388SSatish Balay     MatSetOption(a->A,op);
117358667388SSatish Balay     MatSetOption(a->B,op);
11742b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
117590f02eecSBarry Smith     a->donotstash = 1;
117690f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1177e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
117858667388SSatish Balay   else
1179e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
118058667388SSatish Balay   return 0;
118158667388SSatish Balay }
118258667388SSatish Balay 
11835615d1e5SSatish Balay #undef __FUNC__
11845615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1185ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
11860ac07820SSatish Balay {
11870ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
11880ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
11890ac07820SSatish Balay   Mat        B;
11900ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
11910ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
11920ac07820SSatish Balay   Scalar     *a;
11930ac07820SSatish Balay 
11940ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1195e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
11960ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
11970ac07820SSatish Balay   CHKERRQ(ierr);
11980ac07820SSatish Balay 
11990ac07820SSatish Balay   /* copy over the A part */
12000ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
12010ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12020ac07820SSatish Balay   row = baij->rstart;
12030ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
12040ac07820SSatish Balay 
12050ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12060ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12070ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12080ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12090ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
12100ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12110ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12120ac07820SSatish Balay         col++; a += bs;
12130ac07820SSatish Balay       }
12140ac07820SSatish Balay     }
12150ac07820SSatish Balay   }
12160ac07820SSatish Balay   /* copy over the B part */
12170ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
12180ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12190ac07820SSatish Balay   row = baij->rstart*bs;
12200ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12210ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12220ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12230ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12240ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
12250ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12260ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12270ac07820SSatish Balay         col++; a += bs;
12280ac07820SSatish Balay       }
12290ac07820SSatish Balay     }
12300ac07820SSatish Balay   }
12310ac07820SSatish Balay   PetscFree(rvals);
12320ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12330ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12340ac07820SSatish Balay 
12350ac07820SSatish Balay   if (matout != PETSC_NULL) {
12360ac07820SSatish Balay     *matout = B;
12370ac07820SSatish Balay   } else {
12380ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
12390ac07820SSatish Balay     PetscFree(baij->rowners);
12400ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
12410ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
12420ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
12430ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
12440ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
12450ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
12460ac07820SSatish Balay     PetscFree(baij);
1247f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
12480ac07820SSatish Balay     PetscHeaderDestroy(B);
12490ac07820SSatish Balay   }
12500ac07820SSatish Balay   return 0;
12510ac07820SSatish Balay }
12520e95ebc0SSatish Balay 
12535615d1e5SSatish Balay #undef __FUNC__
12545615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
12550e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
12560e95ebc0SSatish Balay {
12570e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
12580e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
12590e95ebc0SSatish Balay   int ierr,s1,s2,s3;
12600e95ebc0SSatish Balay 
12610e95ebc0SSatish Balay   if (ll)  {
12620e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
12630e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1264e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
12650e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
12660e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
12670e95ebc0SSatish Balay   }
1268e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
12690e95ebc0SSatish Balay   return 0;
12700e95ebc0SSatish Balay }
12710e95ebc0SSatish Balay 
12720ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
12730ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
12740ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
12750ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
12760ac07820SSatish Balay    routine.
12770ac07820SSatish Balay */
12785615d1e5SSatish Balay #undef __FUNC__
12795615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1280ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
12810ac07820SSatish Balay {
12820ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
12830ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
12840ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
12850ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
12860ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
12870ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
12880ac07820SSatish Balay   MPI_Comm       comm = A->comm;
12890ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
12900ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
12910ac07820SSatish Balay   IS             istmp;
12920ac07820SSatish Balay 
12930ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
12940ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
12950ac07820SSatish Balay 
12960ac07820SSatish Balay   /*  first count number of contributors to each processor */
12970ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
12980ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
12990ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
13000ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13010ac07820SSatish Balay     idx = rows[i];
13020ac07820SSatish Balay     found = 0;
13030ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
13040ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
13050ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
13060ac07820SSatish Balay       }
13070ac07820SSatish Balay     }
1308e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
13090ac07820SSatish Balay   }
13100ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
13110ac07820SSatish Balay 
13120ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
13130ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
13140ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
13150ac07820SSatish Balay   nrecvs = work[rank];
13160ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
13170ac07820SSatish Balay   nmax = work[rank];
13180ac07820SSatish Balay   PetscFree(work);
13190ac07820SSatish Balay 
13200ac07820SSatish Balay   /* post receives:   */
13210ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
13220ac07820SSatish Balay   CHKPTRQ(rvalues);
13230ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
13240ac07820SSatish Balay   CHKPTRQ(recv_waits);
13250ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13260ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
13270ac07820SSatish Balay   }
13280ac07820SSatish Balay 
13290ac07820SSatish Balay   /* do sends:
13300ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
13310ac07820SSatish Balay          the ith processor
13320ac07820SSatish Balay   */
13330ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
13340ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
13350ac07820SSatish Balay   CHKPTRQ(send_waits);
13360ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
13370ac07820SSatish Balay   starts[0] = 0;
13380ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13390ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13400ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
13410ac07820SSatish Balay   }
13420ac07820SSatish Balay   ISRestoreIndices(is,&rows);
13430ac07820SSatish Balay 
13440ac07820SSatish Balay   starts[0] = 0;
13450ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13460ac07820SSatish Balay   count = 0;
13470ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
13480ac07820SSatish Balay     if (procs[i]) {
13490ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
13500ac07820SSatish Balay     }
13510ac07820SSatish Balay   }
13520ac07820SSatish Balay   PetscFree(starts);
13530ac07820SSatish Balay 
13540ac07820SSatish Balay   base = owners[rank]*bs;
13550ac07820SSatish Balay 
13560ac07820SSatish Balay   /*  wait on receives */
13570ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
13580ac07820SSatish Balay   source = lens + nrecvs;
13590ac07820SSatish Balay   count  = nrecvs; slen = 0;
13600ac07820SSatish Balay   while (count) {
13610ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
13620ac07820SSatish Balay     /* unpack receives into our local space */
13630ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
13640ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
13650ac07820SSatish Balay     lens[imdex]  = n;
13660ac07820SSatish Balay     slen += n;
13670ac07820SSatish Balay     count--;
13680ac07820SSatish Balay   }
13690ac07820SSatish Balay   PetscFree(recv_waits);
13700ac07820SSatish Balay 
13710ac07820SSatish Balay   /* move the data into the send scatter */
13720ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
13730ac07820SSatish Balay   count = 0;
13740ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13750ac07820SSatish Balay     values = rvalues + i*nmax;
13760ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
13770ac07820SSatish Balay       lrows[count++] = values[j] - base;
13780ac07820SSatish Balay     }
13790ac07820SSatish Balay   }
13800ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
13810ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
13820ac07820SSatish Balay 
13830ac07820SSatish Balay   /* actually zap the local rows */
1384029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
13850ac07820SSatish Balay   PLogObjectParent(A,istmp);
13860ac07820SSatish Balay   PetscFree(lrows);
13870ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
13880ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
13890ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
13900ac07820SSatish Balay 
13910ac07820SSatish Balay   /* wait on sends */
13920ac07820SSatish Balay   if (nsends) {
13930ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
13940ac07820SSatish Balay     CHKPTRQ(send_status);
13950ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
13960ac07820SSatish Balay     PetscFree(send_status);
13970ac07820SSatish Balay   }
13980ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
13990ac07820SSatish Balay 
14000ac07820SSatish Balay   return 0;
14010ac07820SSatish Balay }
1402ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
14035615d1e5SSatish Balay #undef __FUNC__
14045615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1405ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1406ba4ca20aSSatish Balay {
1407ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1408ba4ca20aSSatish Balay 
1409ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1410ba4ca20aSSatish Balay   else return 0;
1411ba4ca20aSSatish Balay }
14120ac07820SSatish Balay 
14135615d1e5SSatish Balay #undef __FUNC__
14145615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1415ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1416bb5a7306SBarry Smith {
1417bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1418bb5a7306SBarry Smith   int         ierr;
1419bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1420bb5a7306SBarry Smith   return 0;
1421bb5a7306SBarry Smith }
1422bb5a7306SBarry Smith 
14230ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
14240ac07820SSatish Balay 
142579bdfe76SSatish Balay /* -------------------------------------------------------------------*/
142679bdfe76SSatish Balay static struct _MatOps MatOps = {
1427bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
14284c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
14294c50302cSBarry Smith   0,0,0,0,
14300ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
14310e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
143258667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
14334c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
14344c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
14354c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
143694a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1437d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1438ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1439bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1440ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
144179bdfe76SSatish Balay 
144279bdfe76SSatish Balay 
14435615d1e5SSatish Balay #undef __FUNC__
14445615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
144579bdfe76SSatish Balay /*@C
144679bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
144779bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
144879bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
144979bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
145079bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
145179bdfe76SSatish Balay 
145279bdfe76SSatish Balay    Input Parameters:
145379bdfe76SSatish Balay .  comm - MPI communicator
145479bdfe76SSatish Balay .  bs   - size of blockk
145579bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
145692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
145792e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
145892e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
145992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
146092e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
146179bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
146292e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
146379bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
146479bdfe76SSatish Balay            submatrix  (same for all local rows)
146592e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
146692e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
146792e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
146892e8d321SLois Curfman McInnes            it is zero.
146992e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
147079bdfe76SSatish Balay            submatrix (same for all local rows).
147192e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
147292e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
147392e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
147479bdfe76SSatish Balay 
147579bdfe76SSatish Balay    Output Parameter:
147679bdfe76SSatish Balay .  A - the matrix
147779bdfe76SSatish Balay 
147879bdfe76SSatish Balay    Notes:
147979bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
148079bdfe76SSatish Balay    (possibly both).
148179bdfe76SSatish Balay 
148279bdfe76SSatish Balay    Storage Information:
148379bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
148479bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
148579bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
148679bdfe76SSatish Balay    local matrix (a rectangular submatrix).
148779bdfe76SSatish Balay 
148879bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
148979bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
149079bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
149179bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
149279bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
149379bdfe76SSatish Balay 
149479bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
149579bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
149679bdfe76SSatish Balay 
149779bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
149879bdfe76SSatish Balay $         -------------------
149979bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
150079bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
150179bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
150279bdfe76SSatish Balay $         -------------------
150379bdfe76SSatish Balay $
150479bdfe76SSatish Balay 
150579bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
150679bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
150779bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
150857b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
150979bdfe76SSatish Balay 
151079bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
151179bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
151279bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
151392e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
151492e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
15156da5968aSLois Curfman McInnes    matrices.
151679bdfe76SSatish Balay 
151792e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
151879bdfe76SSatish Balay 
151979bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
152079bdfe76SSatish Balay @*/
152179bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
152279bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
152379bdfe76SSatish Balay {
152479bdfe76SSatish Balay   Mat          B;
152579bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
15264c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
152779bdfe76SSatish Balay 
1528e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
152979bdfe76SSatish Balay   *A = 0;
1530f09e8eb9SSatish Balay   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
153179bdfe76SSatish Balay   PLogObjectCreate(B);
153279bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
153379bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
153479bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
15354c50302cSBarry Smith   MPI_Comm_size(comm,&size);
15364c50302cSBarry Smith   if (size == 1) {
15374c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
15384c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
15394c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
15404c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
15414c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
15424c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
15434c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
15444c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
15454c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
15464c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
15474c50302cSBarry Smith   }
15484c50302cSBarry Smith 
154979bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
155079bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
155190f02eecSBarry Smith   B->mapping    = 0;
155279bdfe76SSatish Balay   B->factor     = 0;
155379bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
155479bdfe76SSatish Balay 
1555e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
155679bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
155779bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
155879bdfe76SSatish Balay 
155979bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1560e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1561e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1562e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1563cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1564cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
156579bdfe76SSatish Balay 
156679bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
156779bdfe76SSatish Balay     work[0] = m; work[1] = n;
156879bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
156979bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
157079bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
157179bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
157279bdfe76SSatish Balay   }
157379bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
157479bdfe76SSatish Balay     Mbs = M/bs;
1575e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
157679bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
157779bdfe76SSatish Balay     m   = mbs*bs;
157879bdfe76SSatish Balay   }
157979bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
158079bdfe76SSatish Balay     Nbs = N/bs;
1581e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
158279bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
158379bdfe76SSatish Balay     n   = nbs*bs;
158479bdfe76SSatish Balay   }
1585e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
158679bdfe76SSatish Balay 
158779bdfe76SSatish Balay   b->m = m; B->m = m;
158879bdfe76SSatish Balay   b->n = n; B->n = n;
158979bdfe76SSatish Balay   b->N = N; B->N = N;
159079bdfe76SSatish Balay   b->M = M; B->M = M;
159179bdfe76SSatish Balay   b->bs  = bs;
159279bdfe76SSatish Balay   b->bs2 = bs*bs;
159379bdfe76SSatish Balay   b->mbs = mbs;
159479bdfe76SSatish Balay   b->nbs = nbs;
159579bdfe76SSatish Balay   b->Mbs = Mbs;
159679bdfe76SSatish Balay   b->Nbs = Nbs;
159779bdfe76SSatish Balay 
159879bdfe76SSatish Balay   /* build local table of row and column ownerships */
159979bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1600f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
16010ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
160279bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
160379bdfe76SSatish Balay   b->rowners[0] = 0;
160479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
160579bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
160679bdfe76SSatish Balay   }
160779bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
160879bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
16094fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
16104fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
16114fa0d573SSatish Balay 
161279bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
161379bdfe76SSatish Balay   b->cowners[0] = 0;
161479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
161579bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
161679bdfe76SSatish Balay   }
161779bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
161879bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
16194fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
16204fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
162179bdfe76SSatish Balay 
162279bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1623029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
162479bdfe76SSatish Balay   PLogObjectParent(B,b->A);
162579bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1626029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
162779bdfe76SSatish Balay   PLogObjectParent(B,b->B);
162879bdfe76SSatish Balay 
162979bdfe76SSatish Balay   /* build cache for off array entries formed */
163079bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
163190f02eecSBarry Smith   b->donotstash  = 0;
163279bdfe76SSatish Balay   b->colmap      = 0;
163379bdfe76SSatish Balay   b->garray      = 0;
163479bdfe76SSatish Balay   b->roworiented = 1;
163579bdfe76SSatish Balay 
163630793edcSSatish Balay   /* stuff used in block assembly */
163730793edcSSatish Balay   b->barray      = 0;
163830793edcSSatish Balay 
163979bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
164079bdfe76SSatish Balay   b->lvec        = 0;
164179bdfe76SSatish Balay   b->Mvctx       = 0;
164279bdfe76SSatish Balay 
164379bdfe76SSatish Balay   /* stuff for MatGetRow() */
164479bdfe76SSatish Balay   b->rowindices   = 0;
164579bdfe76SSatish Balay   b->rowvalues    = 0;
164679bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
164779bdfe76SSatish Balay 
164879bdfe76SSatish Balay   *A = B;
164979bdfe76SSatish Balay   return 0;
165079bdfe76SSatish Balay }
1651026e39d0SSatish Balay 
16525615d1e5SSatish Balay #undef __FUNC__
16535615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
16540ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
16550ac07820SSatish Balay {
16560ac07820SSatish Balay   Mat         mat;
16570ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
16580ac07820SSatish Balay   int         ierr, len=0, flg;
16590ac07820SSatish Balay 
16600ac07820SSatish Balay   *newmat       = 0;
1661f09e8eb9SSatish Balay   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
16620ac07820SSatish Balay   PLogObjectCreate(mat);
16630ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
16640ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
16650ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
16660ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
16670ac07820SSatish Balay   mat->factor     = matin->factor;
16680ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
16690ac07820SSatish Balay 
16700ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
16710ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
16720ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
16730ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
16740ac07820SSatish Balay 
16750ac07820SSatish Balay   a->bs  = oldmat->bs;
16760ac07820SSatish Balay   a->bs2 = oldmat->bs2;
16770ac07820SSatish Balay   a->mbs = oldmat->mbs;
16780ac07820SSatish Balay   a->nbs = oldmat->nbs;
16790ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
16800ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
16810ac07820SSatish Balay 
16820ac07820SSatish Balay   a->rstart       = oldmat->rstart;
16830ac07820SSatish Balay   a->rend         = oldmat->rend;
16840ac07820SSatish Balay   a->cstart       = oldmat->cstart;
16850ac07820SSatish Balay   a->cend         = oldmat->cend;
16860ac07820SSatish Balay   a->size         = oldmat->size;
16870ac07820SSatish Balay   a->rank         = oldmat->rank;
1688e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
16890ac07820SSatish Balay   a->rowvalues    = 0;
16900ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
169130793edcSSatish Balay   a->barray       = 0;
16920ac07820SSatish Balay 
16930ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1694f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
16950ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
16960ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
16970ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
16980ac07820SSatish Balay   if (oldmat->colmap) {
16990ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
17000ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
17010ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
17020ac07820SSatish Balay   } else a->colmap = 0;
17030ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
17040ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
17050ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
17060ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
17070ac07820SSatish Balay   } else a->garray = 0;
17080ac07820SSatish Balay 
17090ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
17100ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
17110ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
17120ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
17130ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
17140ac07820SSatish Balay   PLogObjectParent(mat,a->A);
17150ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
17160ac07820SSatish Balay   PLogObjectParent(mat,a->B);
17170ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
17180ac07820SSatish Balay   if (flg) {
17190ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
17200ac07820SSatish Balay   }
17210ac07820SSatish Balay   *newmat = mat;
17220ac07820SSatish Balay   return 0;
17230ac07820SSatish Balay }
172457b952d6SSatish Balay 
172557b952d6SSatish Balay #include "sys.h"
172657b952d6SSatish Balay 
17275615d1e5SSatish Balay #undef __FUNC__
17285615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
172957b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
173057b952d6SSatish Balay {
173157b952d6SSatish Balay   Mat          A;
173257b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
173357b952d6SSatish Balay   Scalar       *vals,*buf;
173457b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
173557b952d6SSatish Balay   MPI_Status   status;
1736cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
173757b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
173857b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
173957b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
174057b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
174157b952d6SSatish Balay 
174257b952d6SSatish Balay 
174357b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
174457b952d6SSatish Balay   bs2  = bs*bs;
174557b952d6SSatish Balay 
174657b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
174757b952d6SSatish Balay   if (!rank) {
174857b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
174957b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1750e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
175157b952d6SSatish Balay   }
175257b952d6SSatish Balay 
175357b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
175457b952d6SSatish Balay   M = header[1]; N = header[2];
175557b952d6SSatish Balay 
1756e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
175757b952d6SSatish Balay 
175857b952d6SSatish Balay   /*
175957b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
176057b952d6SSatish Balay      divisible by the blocksize
176157b952d6SSatish Balay   */
176257b952d6SSatish Balay   Mbs        = M/bs;
176357b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
176457b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
176557b952d6SSatish Balay   else                  Mbs++;
176657b952d6SSatish Balay   if (extra_rows &&!rank) {
1767b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
176857b952d6SSatish Balay   }
1769537820f0SBarry Smith 
177057b952d6SSatish Balay   /* determine ownership of all rows */
177157b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
177257b952d6SSatish Balay   m   = mbs * bs;
1773cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1774cee3aa6bSSatish Balay   browners = rowners + size + 1;
177557b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
177657b952d6SSatish Balay   rowners[0] = 0;
1777cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1778cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
177957b952d6SSatish Balay   rstart = rowners[rank];
178057b952d6SSatish Balay   rend   = rowners[rank+1];
178157b952d6SSatish Balay 
178257b952d6SSatish Balay   /* distribute row lengths to all processors */
178357b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
178457b952d6SSatish Balay   if (!rank) {
178557b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
178657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
178757b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
178857b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1789cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1790cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
179157b952d6SSatish Balay     PetscFree(sndcounts);
179257b952d6SSatish Balay   }
179357b952d6SSatish Balay   else {
179457b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
179557b952d6SSatish Balay   }
179657b952d6SSatish Balay 
179757b952d6SSatish Balay   if (!rank) {
179857b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
179957b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
180057b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
180157b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
180257b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
180357b952d6SSatish Balay         procsnz[i] += rowlengths[j];
180457b952d6SSatish Balay       }
180557b952d6SSatish Balay     }
180657b952d6SSatish Balay     PetscFree(rowlengths);
180757b952d6SSatish Balay 
180857b952d6SSatish Balay     /* determine max buffer needed and allocate it */
180957b952d6SSatish Balay     maxnz = 0;
181057b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
181157b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
181257b952d6SSatish Balay     }
181357b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
181457b952d6SSatish Balay 
181557b952d6SSatish Balay     /* read in my part of the matrix column indices  */
181657b952d6SSatish Balay     nz = procsnz[0];
181757b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
181857b952d6SSatish Balay     mycols = ibuf;
1819cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
182057b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1821cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1822cee3aa6bSSatish Balay 
182357b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
182457b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
182557b952d6SSatish Balay       nz = procsnz[i];
182657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
182757b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
182857b952d6SSatish Balay     }
182957b952d6SSatish Balay     /* read in the stuff for the last proc */
183057b952d6SSatish Balay     if ( size != 1 ) {
183157b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
183257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
183357b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1834cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
183557b952d6SSatish Balay     }
183657b952d6SSatish Balay     PetscFree(cols);
183757b952d6SSatish Balay   }
183857b952d6SSatish Balay   else {
183957b952d6SSatish Balay     /* determine buffer space needed for message */
184057b952d6SSatish Balay     nz = 0;
184157b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
184257b952d6SSatish Balay       nz += locrowlens[i];
184357b952d6SSatish Balay     }
184457b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
184557b952d6SSatish Balay     mycols = ibuf;
184657b952d6SSatish Balay     /* receive message of column indices*/
184757b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
184857b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1849e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
185057b952d6SSatish Balay   }
185157b952d6SSatish Balay 
185257b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1853cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1854cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
185557b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1856cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
185757b952d6SSatish Balay   masked1 = mask    + Mbs;
185857b952d6SSatish Balay   masked2 = masked1 + Mbs;
185957b952d6SSatish Balay   rowcount = 0; nzcount = 0;
186057b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
186157b952d6SSatish Balay     dcount  = 0;
186257b952d6SSatish Balay     odcount = 0;
186357b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
186457b952d6SSatish Balay       kmax = locrowlens[rowcount];
186557b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
186657b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
186757b952d6SSatish Balay         if (!mask[tmp]) {
186857b952d6SSatish Balay           mask[tmp] = 1;
186957b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
187057b952d6SSatish Balay           else masked1[dcount++] = tmp;
187157b952d6SSatish Balay         }
187257b952d6SSatish Balay       }
187357b952d6SSatish Balay       rowcount++;
187457b952d6SSatish Balay     }
1875cee3aa6bSSatish Balay 
187657b952d6SSatish Balay     dlens[i]  = dcount;
187757b952d6SSatish Balay     odlens[i] = odcount;
1878cee3aa6bSSatish Balay 
187957b952d6SSatish Balay     /* zero out the mask elements we set */
188057b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
188157b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
188257b952d6SSatish Balay   }
1883cee3aa6bSSatish Balay 
188457b952d6SSatish Balay   /* create our matrix */
1885537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1886537820f0SBarry Smith          CHKERRQ(ierr);
188757b952d6SSatish Balay   A = *newmat;
18886d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
188957b952d6SSatish Balay 
189057b952d6SSatish Balay   if (!rank) {
189157b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
189257b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
189357b952d6SSatish Balay     nz = procsnz[0];
189457b952d6SSatish Balay     vals = buf;
1895cee3aa6bSSatish Balay     mycols = ibuf;
1896cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
189757b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1898cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1899537820f0SBarry Smith 
190057b952d6SSatish Balay     /* insert into matrix */
190157b952d6SSatish Balay     jj      = rstart*bs;
190257b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
190357b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
190457b952d6SSatish Balay       mycols += locrowlens[i];
190557b952d6SSatish Balay       vals   += locrowlens[i];
190657b952d6SSatish Balay       jj++;
190757b952d6SSatish Balay     }
190857b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
190957b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
191057b952d6SSatish Balay       nz = procsnz[i];
191157b952d6SSatish Balay       vals = buf;
191257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
191357b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
191457b952d6SSatish Balay     }
191557b952d6SSatish Balay     /* the last proc */
191657b952d6SSatish Balay     if ( size != 1 ){
191757b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1918cee3aa6bSSatish Balay       vals = buf;
191957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
192057b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1921cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
192257b952d6SSatish Balay     }
192357b952d6SSatish Balay     PetscFree(procsnz);
192457b952d6SSatish Balay   }
192557b952d6SSatish Balay   else {
192657b952d6SSatish Balay     /* receive numeric values */
192757b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
192857b952d6SSatish Balay 
192957b952d6SSatish Balay     /* receive message of values*/
193057b952d6SSatish Balay     vals = buf;
1931cee3aa6bSSatish Balay     mycols = ibuf;
193257b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
193357b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1934e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
193557b952d6SSatish Balay 
193657b952d6SSatish Balay     /* insert into matrix */
193757b952d6SSatish Balay     jj      = rstart*bs;
1938cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
193957b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
194057b952d6SSatish Balay       mycols += locrowlens[i];
194157b952d6SSatish Balay       vals   += locrowlens[i];
194257b952d6SSatish Balay       jj++;
194357b952d6SSatish Balay     }
194457b952d6SSatish Balay   }
194557b952d6SSatish Balay   PetscFree(locrowlens);
194657b952d6SSatish Balay   PetscFree(buf);
194757b952d6SSatish Balay   PetscFree(ibuf);
194857b952d6SSatish Balay   PetscFree(rowners);
194957b952d6SSatish Balay   PetscFree(dlens);
1950cee3aa6bSSatish Balay   PetscFree(mask);
19516d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19526d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
195357b952d6SSatish Balay   return 0;
195457b952d6SSatish Balay }
195557b952d6SSatish Balay 
195657b952d6SSatish Balay 
1957