xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 11ebbc714b45ef4372941d536196407bce829dd6)
179bdfe76SSatish Balay #ifndef lint
2*11ebbc71SLois Curfman McInnes static char vcid[] = "$Id: mpibaij.c,v 1.68 1997/05/03 22:43:28 curfman Exp curfman $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2257b952d6SSatish Balay 
233b2fbd54SBarry Smith 
24537820f0SBarry Smith /*
25537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2857b952d6SSatish Balay    length of colmap equals the global matrix length.
2957b952d6SSatish Balay */
305615d1e5SSatish Balay #undef __FUNC__
315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3357b952d6SSatish Balay {
3457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
36928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3757b952d6SSatish Balay 
38758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4057b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
41928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4257b952d6SSatish Balay   return 0;
4357b952d6SSatish Balay }
4457b952d6SSatish Balay 
455615d1e5SSatish Balay #undef __FUNC__
465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
483b2fbd54SBarry Smith                             PetscTruth *done)
4957b952d6SSatish Balay {
503b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5157b952d6SSatish Balay   int         ierr;
523b2fbd54SBarry Smith   if (aij->size == 1) {
533b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
553b2fbd54SBarry Smith   return 0;
563b2fbd54SBarry Smith }
573b2fbd54SBarry Smith 
585615d1e5SSatish Balay #undef __FUNC__
595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
613b2fbd54SBarry Smith                                 PetscTruth *done)
623b2fbd54SBarry Smith {
633b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
643b2fbd54SBarry Smith   int        ierr;
653b2fbd54SBarry Smith   if (aij->size == 1) {
663b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
67e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6857b952d6SSatish Balay   return 0;
6957b952d6SSatish Balay }
7080c1aa95SSatish Balay #define CHUNKSIZE  10
7180c1aa95SSatish Balay 
72f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7380c1aa95SSatish Balay { \
7480c1aa95SSatish Balay  \
7580c1aa95SSatish Balay     brow = row/bs;  \
7680c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
77ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
7880c1aa95SSatish Balay       bcol = col/bs; \
7980c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
80ab26458aSBarry Smith       low = 0; high = nrow; \
81ab26458aSBarry Smith       while (high-low > 3) { \
82ab26458aSBarry Smith         t = (low+high)/2; \
83ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
84ab26458aSBarry Smith         else              low  = t; \
85ab26458aSBarry Smith       } \
86ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
8780c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8880c1aa95SSatish Balay         if (rp[_i] == bcol) { \
8980c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
90eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
91eada6651SSatish Balay           else                    *bap  = value;  \
92ac7a638eSSatish Balay           goto a_noinsert; \
9380c1aa95SSatish Balay         } \
9480c1aa95SSatish Balay       } \
9589280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
96*11ebbc71SLois Curfman McInnes       else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
9780c1aa95SSatish Balay       if (nrow >= rmax) { \
9880c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9980c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
10080c1aa95SSatish Balay         Scalar *new_a; \
10180c1aa95SSatish Balay  \
102*11ebbc71SLois Curfman McInnes         if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
10389280ab3SLois Curfman McInnes  \
10480c1aa95SSatish Balay         /* malloc new storage space */ \
10580c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
10680c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
10780c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10880c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10980c1aa95SSatish Balay  \
11080c1aa95SSatish Balay         /* copy over old data into new slots */ \
11180c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
11280c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
11380c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
11480c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
11580c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
11680c1aa95SSatish Balay                                                            len*sizeof(int)); \
11780c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11880c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11980c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
12080c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
12180c1aa95SSatish Balay         /* free up old matrix storage */ \
12280c1aa95SSatish Balay         PetscFree(a->a);  \
12380c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
12480c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
12580c1aa95SSatish Balay         a->singlemalloc = 1; \
12680c1aa95SSatish Balay  \
12780c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
128ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
12980c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
13080c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
13180c1aa95SSatish Balay         a->reallocs++; \
13280c1aa95SSatish Balay         a->nz++; \
13380c1aa95SSatish Balay       } \
13480c1aa95SSatish Balay       N = nrow++ - 1;  \
13580c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
13680c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
13780c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13880c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13980c1aa95SSatish Balay       } \
14080c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
14180c1aa95SSatish Balay       rp[_i]                      = bcol;  \
14280c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
143ac7a638eSSatish Balay       a_noinsert:; \
14480c1aa95SSatish Balay     ailen[brow] = nrow; \
14580c1aa95SSatish Balay }
14657b952d6SSatish Balay 
147ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
148ac7a638eSSatish Balay { \
149ac7a638eSSatish Balay  \
150ac7a638eSSatish Balay     brow = row/bs;  \
151ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
153ac7a638eSSatish Balay       bcol = col/bs; \
154ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
155ac7a638eSSatish Balay       low = 0; high = nrow; \
156ac7a638eSSatish Balay       while (high-low > 3) { \
157ac7a638eSSatish Balay         t = (low+high)/2; \
158ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
159ac7a638eSSatish Balay         else              low  = t; \
160ac7a638eSSatish Balay       } \
161ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
162ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
163ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
164ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
165ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
166ac7a638eSSatish Balay           else                    *bap  = value;  \
167ac7a638eSSatish Balay           goto b_noinsert; \
168ac7a638eSSatish Balay         } \
169ac7a638eSSatish Balay       } \
17089280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
171*11ebbc71SLois Curfman McInnes       else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
172ac7a638eSSatish Balay       if (nrow >= rmax) { \
173ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
17474c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
175ac7a638eSSatish Balay         Scalar *new_a; \
176ac7a638eSSatish Balay  \
177*11ebbc71SLois Curfman McInnes         if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
17889280ab3SLois Curfman McInnes  \
179ac7a638eSSatish Balay         /* malloc new storage space */ \
18074c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
181ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
182ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
183ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
184ac7a638eSSatish Balay  \
185ac7a638eSSatish Balay         /* copy over old data into new slots */ \
186ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
18774c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
188ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
189ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
190ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
191ac7a638eSSatish Balay                                                            len*sizeof(int)); \
192ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
193ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
194ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
195ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
196ac7a638eSSatish Balay         /* free up old matrix storage */ \
19774c639caSSatish Balay         PetscFree(b->a);  \
19874c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
19974c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
20074c639caSSatish Balay         b->singlemalloc = 1; \
201ac7a638eSSatish Balay  \
202ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
203ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
20474c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
20574c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
20674c639caSSatish Balay         b->reallocs++; \
20774c639caSSatish Balay         b->nz++; \
208ac7a638eSSatish Balay       } \
209ac7a638eSSatish Balay       N = nrow++ - 1;  \
210ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
211ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
212ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
213ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
214ac7a638eSSatish Balay       } \
215ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
216ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
217ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
218ac7a638eSSatish Balay       b_noinsert:; \
219ac7a638eSSatish Balay     bilen[brow] = nrow; \
220ac7a638eSSatish Balay }
221ac7a638eSSatish Balay 
2225615d1e5SSatish Balay #undef __FUNC__
2235615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
224ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
22557b952d6SSatish Balay {
22657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
22757b952d6SSatish Balay   Scalar      value;
2284fa0d573SSatish Balay   int         ierr,i,j,row,col;
2294fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2304fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2314fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
23257b952d6SSatish Balay 
233eada6651SSatish Balay   /* Some Variables required in the macro */
23480c1aa95SSatish Balay   Mat         A = baij->A;
23580c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
236ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
237ac7a638eSSatish Balay   Scalar      *aa=a->a;
238ac7a638eSSatish Balay 
239ac7a638eSSatish Balay   Mat         B = baij->B;
240ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
241ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
242ac7a638eSSatish Balay   Scalar      *ba=b->a;
243ac7a638eSSatish Balay 
244ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
245ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
246ac7a638eSSatish Balay   Scalar      *ap,*bap;
24780c1aa95SSatish Balay 
24857b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
249639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
250e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
251e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
252639f9d9dSBarry Smith #endif
25357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
25457b952d6SSatish Balay       row = im[i] - rstart_orig;
25557b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
25657b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
25757b952d6SSatish Balay           col = in[j] - cstart_orig;
25857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
26080c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
26157b952d6SSatish Balay         }
262639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
263e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
264e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
265639f9d9dSBarry Smith #endif
26657b952d6SSatish Balay         else {
26757b952d6SSatish Balay           if (mat->was_assembled) {
268905e6a2fSBarry Smith             if (!baij->colmap) {
269905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
270905e6a2fSBarry Smith             }
271905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
27257b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
27357b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
27457b952d6SSatish Balay               col =  in[j];
27557b952d6SSatish Balay             }
27657b952d6SSatish Balay           }
27757b952d6SSatish Balay           else col = in[j];
27857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
279ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
280ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
28157b952d6SSatish Balay         }
28257b952d6SSatish Balay       }
28357b952d6SSatish Balay     }
28457b952d6SSatish Balay     else {
28590f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
28657b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
28757b952d6SSatish Balay       }
28857b952d6SSatish Balay       else {
28990f02eecSBarry Smith         if (!baij->donotstash) {
29057b952d6SSatish Balay           row = im[i];
29157b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
29257b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
29357b952d6SSatish Balay           }
29457b952d6SSatish Balay         }
29557b952d6SSatish Balay       }
29657b952d6SSatish Balay     }
29790f02eecSBarry Smith   }
29857b952d6SSatish Balay   return 0;
29957b952d6SSatish Balay }
30057b952d6SSatish Balay 
301ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
302ab26458aSBarry Smith #undef __FUNC__
303ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
304ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
305ab26458aSBarry Smith {
306ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
30730793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
308abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
309ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
310ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
311ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
312ab26458aSBarry Smith 
31330793edcSSatish Balay 
31430793edcSSatish Balay   if(!barray) {
31530793edcSSatish Balay     barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
31630793edcSSatish Balay     baij->barray = barray;
31730793edcSSatish Balay   }
31830793edcSSatish Balay 
319ab26458aSBarry Smith   if (roworiented) {
320ab26458aSBarry Smith     stepval = (n-1)*bs;
321ab26458aSBarry Smith   } else {
322ab26458aSBarry Smith     stepval = (m-1)*bs;
323ab26458aSBarry Smith   }
324ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
325ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
326ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
327ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
328ab26458aSBarry Smith #endif
329ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
330ab26458aSBarry Smith       row = im[i] - rstart;
331ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
33215b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
33315b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
33415b57d14SSatish Balay           barray = v + i*bs2;
33515b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
33615b57d14SSatish Balay           barray = v + j*bs2;
33715b57d14SSatish Balay         } else { /* Here a copy is required */
338ab26458aSBarry Smith           if (roworiented) {
339ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
340ab26458aSBarry Smith           } else {
341ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
342abef11f7SSatish Balay           }
343ab26458aSBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval )
344ab26458aSBarry Smith             for (jj=0; jj<bs; jj++ )
34530793edcSSatish Balay               *barray++  = *value++;
34630793edcSSatish Balay           barray -=bs2;
34715b57d14SSatish Balay         }
348abef11f7SSatish Balay 
349abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
350abef11f7SSatish Balay           col = in[j] - cstart;
35130793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
352ab26458aSBarry Smith         }
353ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
354ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
355ab26458aSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");}
356ab26458aSBarry Smith #endif
357ab26458aSBarry Smith         else {
358ab26458aSBarry Smith           if (mat->was_assembled) {
359ab26458aSBarry Smith             if (!baij->colmap) {
360ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
361ab26458aSBarry Smith             }
362ab26458aSBarry Smith             col = baij->colmap[in[j]] - 1;
363ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
364ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
365ab26458aSBarry Smith               col =  in[j];
366ab26458aSBarry Smith             }
367ab26458aSBarry Smith           }
368ab26458aSBarry Smith           else col = in[j];
36930793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
370ab26458aSBarry Smith         }
371ab26458aSBarry Smith       }
372ab26458aSBarry Smith     }
373ab26458aSBarry Smith     else {
374ab26458aSBarry Smith       if (!baij->donotstash) {
375ab26458aSBarry Smith         if (roworiented ) {
376abef11f7SSatish Balay           row   = im[i]*bs;
377abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
378abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
379abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
380abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
381abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
382abef11f7SSatish Balay               }
383ab26458aSBarry Smith             }
384ab26458aSBarry Smith           }
385ab26458aSBarry Smith         }
386ab26458aSBarry Smith         else {
387ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
388abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
389abef11f7SSatish Balay             col   = in[j]*bs;
390abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
391abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
392abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
393ab26458aSBarry Smith               }
394ab26458aSBarry Smith             }
395ab26458aSBarry Smith           }
396abef11f7SSatish Balay 
397abef11f7SSatish Balay         }
398abef11f7SSatish Balay       }
399ab26458aSBarry Smith     }
400ab26458aSBarry Smith   }
401ab26458aSBarry Smith   return 0;
402ab26458aSBarry Smith }
403ab26458aSBarry Smith 
4045615d1e5SSatish Balay #undef __FUNC__
4055615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
406ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
407d6de1c52SSatish Balay {
408d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
409d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
410d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
411d6de1c52SSatish Balay 
412d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
413e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
414e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
415d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
416d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
417d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
418e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
419e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
420d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
421d6de1c52SSatish Balay           col = idxn[j] - bscstart;
422d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
423d6de1c52SSatish Balay         }
424d6de1c52SSatish Balay         else {
425905e6a2fSBarry Smith           if (!baij->colmap) {
426905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427905e6a2fSBarry Smith           }
428e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
429dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
430d9d09a02SSatish Balay           else {
431dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
432d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
433d6de1c52SSatish Balay           }
434d6de1c52SSatish Balay         }
435d6de1c52SSatish Balay       }
436d9d09a02SSatish Balay     }
437d6de1c52SSatish Balay     else {
438e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
439d6de1c52SSatish Balay     }
440d6de1c52SSatish Balay   }
441d6de1c52SSatish Balay   return 0;
442d6de1c52SSatish Balay }
443d6de1c52SSatish Balay 
4445615d1e5SSatish Balay #undef __FUNC__
4455615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
446ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
447d6de1c52SSatish Balay {
448d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
449d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
450acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
451d6de1c52SSatish Balay   double     sum = 0.0;
452d6de1c52SSatish Balay   Scalar     *v;
453d6de1c52SSatish Balay 
454d6de1c52SSatish Balay   if (baij->size == 1) {
455d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
456d6de1c52SSatish Balay   } else {
457d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
458d6de1c52SSatish Balay       v = amat->a;
459d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
460d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
461d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
462d6de1c52SSatish Balay #else
463d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
464d6de1c52SSatish Balay #endif
465d6de1c52SSatish Balay       }
466d6de1c52SSatish Balay       v = bmat->a;
467d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
468d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
469d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
470d6de1c52SSatish Balay #else
471d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
472d6de1c52SSatish Balay #endif
473d6de1c52SSatish Balay       }
474d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
475d6de1c52SSatish Balay       *norm = sqrt(*norm);
476d6de1c52SSatish Balay     }
477acdf5bf4SSatish Balay     else
478e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
479d6de1c52SSatish Balay   }
480d6de1c52SSatish Balay   return 0;
481d6de1c52SSatish Balay }
48257b952d6SSatish Balay 
4835615d1e5SSatish Balay #undef __FUNC__
4845615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
485ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
48657b952d6SSatish Balay {
48757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
48857b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
48957b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
49057b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
49157b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
49257b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
49357b952d6SSatish Balay   InsertMode  addv;
49457b952d6SSatish Balay   Scalar      *rvalues,*svalues;
49557b952d6SSatish Balay 
49657b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
497e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
49857b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
499e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
50057b952d6SSatish Balay   }
501e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
50257b952d6SSatish Balay 
50357b952d6SSatish Balay   /*  first count number of contributors to each processor */
50457b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
50557b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
50657b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
50757b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
50857b952d6SSatish Balay     idx = baij->stash.idx[i];
50957b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
51057b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
51157b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
51257b952d6SSatish Balay       }
51357b952d6SSatish Balay     }
51457b952d6SSatish Balay   }
51557b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
51657b952d6SSatish Balay 
51757b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
51857b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
51957b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
52057b952d6SSatish Balay   nreceives = work[rank];
52157b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
52257b952d6SSatish Balay   nmax = work[rank];
52357b952d6SSatish Balay   PetscFree(work);
52457b952d6SSatish Balay 
52557b952d6SSatish Balay   /* post receives:
52657b952d6SSatish Balay        1) each message will consist of ordered pairs
52757b952d6SSatish Balay      (global index,value) we store the global index as a double
52857b952d6SSatish Balay      to simplify the message passing.
52957b952d6SSatish Balay        2) since we don't know how long each individual message is we
53057b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
53157b952d6SSatish Balay      this is a lot of wasted space.
53257b952d6SSatish Balay 
53357b952d6SSatish Balay 
53457b952d6SSatish Balay        This could be done better.
53557b952d6SSatish Balay   */
53657b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
53757b952d6SSatish Balay   CHKPTRQ(rvalues);
53857b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
53957b952d6SSatish Balay   CHKPTRQ(recv_waits);
54057b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
54157b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
54257b952d6SSatish Balay               comm,recv_waits+i);
54357b952d6SSatish Balay   }
54457b952d6SSatish Balay 
54557b952d6SSatish Balay   /* do sends:
54657b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
54757b952d6SSatish Balay          the ith processor
54857b952d6SSatish Balay   */
54957b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
55057b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
55157b952d6SSatish Balay   CHKPTRQ(send_waits);
55257b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
55357b952d6SSatish Balay   starts[0] = 0;
55457b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
55557b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
55657b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
55757b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
55857b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
55957b952d6SSatish Balay   }
56057b952d6SSatish Balay   PetscFree(owner);
56157b952d6SSatish Balay   starts[0] = 0;
56257b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
56357b952d6SSatish Balay   count = 0;
56457b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
56557b952d6SSatish Balay     if (procs[i]) {
56657b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
56757b952d6SSatish Balay                 comm,send_waits+count++);
56857b952d6SSatish Balay     }
56957b952d6SSatish Balay   }
57057b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
57157b952d6SSatish Balay 
57257b952d6SSatish Balay   /* Free cache space */
573d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
57457b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
57557b952d6SSatish Balay 
57657b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
57757b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
57857b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
57957b952d6SSatish Balay   baij->rmax       = nmax;
58057b952d6SSatish Balay 
58157b952d6SSatish Balay   return 0;
58257b952d6SSatish Balay }
58357b952d6SSatish Balay 
58457b952d6SSatish Balay 
5855615d1e5SSatish Balay #undef __FUNC__
5865615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
587ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
58857b952d6SSatish Balay {
58957b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
59057b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
59157b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
59257b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
59357b952d6SSatish Balay   Scalar      *values,val;
594e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
59557b952d6SSatish Balay 
59657b952d6SSatish Balay   /*  wait on receives */
59757b952d6SSatish Balay   while (count) {
59857b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
59957b952d6SSatish Balay     /* unpack receives into our local space */
60057b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
60157b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
60257b952d6SSatish Balay     n = n/3;
60357b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
60457b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
60557b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
60657b952d6SSatish Balay       val = values[3*i+2];
60757b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
60857b952d6SSatish Balay         col -= baij->cstart*bs;
60957b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
61057b952d6SSatish Balay       }
61157b952d6SSatish Balay       else {
61257b952d6SSatish Balay         if (mat->was_assembled) {
613905e6a2fSBarry Smith           if (!baij->colmap) {
614905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
615905e6a2fSBarry Smith           }
616905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
61757b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
61857b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
61957b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
62057b952d6SSatish Balay           }
62157b952d6SSatish Balay         }
62257b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
62357b952d6SSatish Balay       }
62457b952d6SSatish Balay     }
62557b952d6SSatish Balay     count--;
62657b952d6SSatish Balay   }
62757b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
62857b952d6SSatish Balay 
62957b952d6SSatish Balay   /* wait on sends */
63057b952d6SSatish Balay   if (baij->nsends) {
63157b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
63257b952d6SSatish Balay     CHKPTRQ(send_status);
63357b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
63457b952d6SSatish Balay     PetscFree(send_status);
63557b952d6SSatish Balay   }
63657b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
63757b952d6SSatish Balay 
63857b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
63957b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
64057b952d6SSatish Balay 
64157b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
64257b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
64357b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
64457b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
64557b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
64657b952d6SSatish Balay   }
64757b952d6SSatish Balay 
6486d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
64957b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
65057b952d6SSatish Balay   }
65157b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
65257b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
65357b952d6SSatish Balay 
65457b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
65557b952d6SSatish Balay   return 0;
65657b952d6SSatish Balay }
65757b952d6SSatish Balay 
6585615d1e5SSatish Balay #undef __FUNC__
6595615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
66057b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
66157b952d6SSatish Balay {
66257b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
66357b952d6SSatish Balay   int          ierr;
66457b952d6SSatish Balay 
66557b952d6SSatish Balay   if (baij->size == 1) {
66657b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
66757b952d6SSatish Balay   }
668e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
66957b952d6SSatish Balay   return 0;
67057b952d6SSatish Balay }
67157b952d6SSatish Balay 
6725615d1e5SSatish Balay #undef __FUNC__
6735615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
67457b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
67557b952d6SSatish Balay {
67657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
677cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
67857b952d6SSatish Balay   FILE         *fd;
67957b952d6SSatish Balay   ViewerType   vtype;
68057b952d6SSatish Balay 
68157b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
68257b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
68357b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
684639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
6854e220ebcSLois Curfman McInnes       MatInfo info;
68657b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
68757b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6884e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
68957b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
69057b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
6914e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
6924e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
6934e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
6944e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
6954e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
6964e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
69757b952d6SSatish Balay       fflush(fd);
69857b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
69957b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
70057b952d6SSatish Balay       return 0;
70157b952d6SSatish Balay     }
702639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
703bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
70457b952d6SSatish Balay       return 0;
70557b952d6SSatish Balay     }
70657b952d6SSatish Balay   }
70757b952d6SSatish Balay 
70857b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
70957b952d6SSatish Balay     Draw       draw;
71057b952d6SSatish Balay     PetscTruth isnull;
71157b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
71257b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
71357b952d6SSatish Balay   }
71457b952d6SSatish Balay 
71557b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
71657b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
71757b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
71857b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
71957b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
72057b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
72157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
72257b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
72357b952d6SSatish Balay     fflush(fd);
72457b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
72557b952d6SSatish Balay   }
72657b952d6SSatish Balay   else {
72757b952d6SSatish Balay     int size = baij->size;
72857b952d6SSatish Balay     rank = baij->rank;
72957b952d6SSatish Balay     if (size == 1) {
73057b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
73157b952d6SSatish Balay     }
73257b952d6SSatish Balay     else {
73357b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
73457b952d6SSatish Balay       Mat         A;
73557b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
73657b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
73757b952d6SSatish Balay       int         mbs=baij->mbs;
73857b952d6SSatish Balay       Scalar      *a;
73957b952d6SSatish Balay 
74057b952d6SSatish Balay       if (!rank) {
741cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
74257b952d6SSatish Balay         CHKERRQ(ierr);
74357b952d6SSatish Balay       }
74457b952d6SSatish Balay       else {
745cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
74657b952d6SSatish Balay         CHKERRQ(ierr);
74757b952d6SSatish Balay       }
74857b952d6SSatish Balay       PLogObjectParent(mat,A);
74957b952d6SSatish Balay 
75057b952d6SSatish Balay       /* copy over the A part */
75157b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
75257b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
75357b952d6SSatish Balay       row = baij->rstart;
75457b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
75557b952d6SSatish Balay 
75657b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
75757b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
75857b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
75957b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
76057b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
76157b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
762cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
763cee3aa6bSSatish Balay             col++; a += bs;
76457b952d6SSatish Balay           }
76557b952d6SSatish Balay         }
76657b952d6SSatish Balay       }
76757b952d6SSatish Balay       /* copy over the B part */
76857b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
76957b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
77057b952d6SSatish Balay       row = baij->rstart*bs;
77157b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
77257b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
77357b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
77457b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
77557b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
77657b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
777cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
778cee3aa6bSSatish Balay             col++; a += bs;
77957b952d6SSatish Balay           }
78057b952d6SSatish Balay         }
78157b952d6SSatish Balay       }
78257b952d6SSatish Balay       PetscFree(rvals);
7836d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7846d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
78557b952d6SSatish Balay       if (!rank) {
78657b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
78757b952d6SSatish Balay       }
78857b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
78957b952d6SSatish Balay     }
79057b952d6SSatish Balay   }
79157b952d6SSatish Balay   return 0;
79257b952d6SSatish Balay }
79357b952d6SSatish Balay 
79457b952d6SSatish Balay 
79557b952d6SSatish Balay 
7965615d1e5SSatish Balay #undef __FUNC__
7975615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
798ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
79957b952d6SSatish Balay {
80057b952d6SSatish Balay   Mat         mat = (Mat) obj;
80157b952d6SSatish Balay   int         ierr;
80257b952d6SSatish Balay   ViewerType  vtype;
80357b952d6SSatish Balay 
80457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
80557b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
80657b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
80757b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
80857b952d6SSatish Balay   }
80957b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
81057b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
81157b952d6SSatish Balay   }
81257b952d6SSatish Balay   return 0;
81357b952d6SSatish Balay }
81457b952d6SSatish Balay 
8155615d1e5SSatish Balay #undef __FUNC__
8165615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
817ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
81879bdfe76SSatish Balay {
81979bdfe76SSatish Balay   Mat         mat = (Mat) obj;
82079bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
82179bdfe76SSatish Balay   int         ierr;
82279bdfe76SSatish Balay 
82379bdfe76SSatish Balay #if defined(PETSC_LOG)
82479bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
82579bdfe76SSatish Balay #endif
82679bdfe76SSatish Balay 
82779bdfe76SSatish Balay   PetscFree(baij->rowners);
82879bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
82979bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
83079bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
83179bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
83279bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
83379bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
83479bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
83530793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
83679bdfe76SSatish Balay   PetscFree(baij);
83790f02eecSBarry Smith   if (mat->mapping) {
83890f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
83990f02eecSBarry Smith   }
84079bdfe76SSatish Balay   PLogObjectDestroy(mat);
84179bdfe76SSatish Balay   PetscHeaderDestroy(mat);
84279bdfe76SSatish Balay   return 0;
84379bdfe76SSatish Balay }
84479bdfe76SSatish Balay 
8455615d1e5SSatish Balay #undef __FUNC__
8465615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
847ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
848cee3aa6bSSatish Balay {
849cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
85047b4a8eaSLois Curfman McInnes   int         ierr, nt;
851cee3aa6bSSatish Balay 
852c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
85347b4a8eaSLois Curfman McInnes   if (nt != a->n) {
854ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
85547b4a8eaSLois Curfman McInnes   }
856c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
85747b4a8eaSLois Curfman McInnes   if (nt != a->m) {
858e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
85947b4a8eaSLois Curfman McInnes   }
86043a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
861cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
86243a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
863cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
86443a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
865cee3aa6bSSatish Balay   return 0;
866cee3aa6bSSatish Balay }
867cee3aa6bSSatish Balay 
8685615d1e5SSatish Balay #undef __FUNC__
8695615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
870ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
871cee3aa6bSSatish Balay {
872cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
873cee3aa6bSSatish Balay   int        ierr;
87443a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
875cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
87643a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
877cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
878cee3aa6bSSatish Balay   return 0;
879cee3aa6bSSatish Balay }
880cee3aa6bSSatish Balay 
8815615d1e5SSatish Balay #undef __FUNC__
8825615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
883ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
884cee3aa6bSSatish Balay {
885cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
886cee3aa6bSSatish Balay   int        ierr;
887cee3aa6bSSatish Balay 
888cee3aa6bSSatish Balay   /* do nondiagonal part */
889cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
890cee3aa6bSSatish Balay   /* send it on its way */
891537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
892cee3aa6bSSatish Balay   /* do local part */
893cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
894cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
895cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
896cee3aa6bSSatish Balay   /* but is not perhaps always true. */
897639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
898cee3aa6bSSatish Balay   return 0;
899cee3aa6bSSatish Balay }
900cee3aa6bSSatish Balay 
9015615d1e5SSatish Balay #undef __FUNC__
9025615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
903ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
904cee3aa6bSSatish Balay {
905cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
906cee3aa6bSSatish Balay   int        ierr;
907cee3aa6bSSatish Balay 
908cee3aa6bSSatish Balay   /* do nondiagonal part */
909cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
910cee3aa6bSSatish Balay   /* send it on its way */
911537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
912cee3aa6bSSatish Balay   /* do local part */
913cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
914cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
915cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
916cee3aa6bSSatish Balay   /* but is not perhaps always true. */
917537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
918cee3aa6bSSatish Balay   return 0;
919cee3aa6bSSatish Balay }
920cee3aa6bSSatish Balay 
921cee3aa6bSSatish Balay /*
922cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
923cee3aa6bSSatish Balay    diagonal block
924cee3aa6bSSatish Balay */
9255615d1e5SSatish Balay #undef __FUNC__
9265615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
927ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
928cee3aa6bSSatish Balay {
929cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
930cee3aa6bSSatish Balay   if (a->M != a->N)
931e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
932cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
933cee3aa6bSSatish Balay }
934cee3aa6bSSatish Balay 
9355615d1e5SSatish Balay #undef __FUNC__
9365615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
937ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
938cee3aa6bSSatish Balay {
939cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
940cee3aa6bSSatish Balay   int        ierr;
941cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
942cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
943cee3aa6bSSatish Balay   return 0;
944cee3aa6bSSatish Balay }
945026e39d0SSatish Balay 
9465615d1e5SSatish Balay #undef __FUNC__
9475615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
948ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
94957b952d6SSatish Balay {
95057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
95157b952d6SSatish Balay   *m = mat->M; *n = mat->N;
95257b952d6SSatish Balay   return 0;
95357b952d6SSatish Balay }
95457b952d6SSatish Balay 
9555615d1e5SSatish Balay #undef __FUNC__
9565615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
957ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
95857b952d6SSatish Balay {
95957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
96057b952d6SSatish Balay   *m = mat->m; *n = mat->N;
96157b952d6SSatish Balay   return 0;
96257b952d6SSatish Balay }
96357b952d6SSatish Balay 
9645615d1e5SSatish Balay #undef __FUNC__
9655615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
966ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
96757b952d6SSatish Balay {
96857b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
96957b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
97057b952d6SSatish Balay   return 0;
97157b952d6SSatish Balay }
97257b952d6SSatish Balay 
973acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
974acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
975acdf5bf4SSatish Balay 
9765615d1e5SSatish Balay #undef __FUNC__
9775615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
978acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
979acdf5bf4SSatish Balay {
980acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
981acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
982acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
983d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
984d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
985acdf5bf4SSatish Balay 
986e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
987acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
988acdf5bf4SSatish Balay 
989acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
990acdf5bf4SSatish Balay     /*
991acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
992acdf5bf4SSatish Balay     */
993acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
994bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
995bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
996acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
997acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
998acdf5bf4SSatish Balay     }
999acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1000acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1001acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1002acdf5bf4SSatish Balay   }
1003acdf5bf4SSatish Balay 
1004acdf5bf4SSatish Balay 
1005e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
1006d9d09a02SSatish Balay   lrow = row - brstart;
1007acdf5bf4SSatish Balay 
1008acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1009acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1010acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1011acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1012acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1013acdf5bf4SSatish Balay   nztot = nzA + nzB;
1014acdf5bf4SSatish Balay 
1015acdf5bf4SSatish Balay   cmap  = mat->garray;
1016acdf5bf4SSatish Balay   if (v  || idx) {
1017acdf5bf4SSatish Balay     if (nztot) {
1018acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1019acdf5bf4SSatish Balay       int imark = -1;
1020acdf5bf4SSatish Balay       if (v) {
1021acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1022acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1023d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1024acdf5bf4SSatish Balay           else break;
1025acdf5bf4SSatish Balay         }
1026acdf5bf4SSatish Balay         imark = i;
1027acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1028acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1029acdf5bf4SSatish Balay       }
1030acdf5bf4SSatish Balay       if (idx) {
1031acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1032acdf5bf4SSatish Balay         if (imark > -1) {
1033acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1034bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1035acdf5bf4SSatish Balay           }
1036acdf5bf4SSatish Balay         } else {
1037acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1038d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1039d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1040acdf5bf4SSatish Balay             else break;
1041acdf5bf4SSatish Balay           }
1042acdf5bf4SSatish Balay           imark = i;
1043acdf5bf4SSatish Balay         }
1044d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1045d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1046acdf5bf4SSatish Balay       }
1047acdf5bf4SSatish Balay     }
1048d212a18eSSatish Balay     else {
1049d212a18eSSatish Balay       if (idx) *idx = 0;
1050d212a18eSSatish Balay       if (v)   *v   = 0;
1051d212a18eSSatish Balay     }
1052acdf5bf4SSatish Balay   }
1053acdf5bf4SSatish Balay   *nz = nztot;
1054acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1055acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1056acdf5bf4SSatish Balay   return 0;
1057acdf5bf4SSatish Balay }
1058acdf5bf4SSatish Balay 
10595615d1e5SSatish Balay #undef __FUNC__
10605615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1061acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1062acdf5bf4SSatish Balay {
1063acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1064acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1065e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
1066acdf5bf4SSatish Balay   }
1067acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
1068acdf5bf4SSatish Balay   return 0;
1069acdf5bf4SSatish Balay }
1070acdf5bf4SSatish Balay 
10715615d1e5SSatish Balay #undef __FUNC__
10725615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1073ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
10745a838052SSatish Balay {
10755a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
10765a838052SSatish Balay   *bs = baij->bs;
10775a838052SSatish Balay   return 0;
10785a838052SSatish Balay }
10795a838052SSatish Balay 
10805615d1e5SSatish Balay #undef __FUNC__
10815615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1082ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
108358667388SSatish Balay {
108458667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
108558667388SSatish Balay   int         ierr;
108658667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
108758667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
108858667388SSatish Balay   return 0;
108958667388SSatish Balay }
10900ac07820SSatish Balay 
10915615d1e5SSatish Balay #undef __FUNC__
10925615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1093ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
10940ac07820SSatish Balay {
10954e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
10964e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
10977d57db60SLois Curfman McInnes   int         ierr;
10987d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
10990ac07820SSatish Balay 
11004e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
11014e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
11024e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
11034e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
11044e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
11054e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
11064e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
11074e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
11084e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
11090ac07820SSatish Balay   if (flag == MAT_LOCAL) {
11104e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
11114e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
11124e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
11134e220ebcSLois Curfman McInnes     info->memory       = isend[3];
11144e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
11150ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1116dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
11174e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11184e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11194e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11204e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11214e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11220ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1123dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
11244e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11254e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11264e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11274e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11284e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11290ac07820SSatish Balay   }
11304e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
11314e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
11324e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
11330ac07820SSatish Balay   return 0;
11340ac07820SSatish Balay }
11350ac07820SSatish Balay 
11365615d1e5SSatish Balay #undef __FUNC__
11375615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1138ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
113958667388SSatish Balay {
114058667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
114158667388SSatish Balay 
114258667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
114358667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
11446da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1145c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
114696854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1147c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1148b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1149b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1150b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1151aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
115258667388SSatish Balay         MatSetOption(a->A,op);
115358667388SSatish Balay         MatSetOption(a->B,op);
1154b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
11556da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
115658667388SSatish Balay              op == MAT_SYMMETRIC ||
115758667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
115858667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
115958667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
116058667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
116158667388SSatish Balay     a->roworiented = 0;
116258667388SSatish Balay     MatSetOption(a->A,op);
116358667388SSatish Balay     MatSetOption(a->B,op);
11642b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
116590f02eecSBarry Smith     a->donotstash = 1;
116690f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1167e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
116858667388SSatish Balay   else
1169e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
117058667388SSatish Balay   return 0;
117158667388SSatish Balay }
117258667388SSatish Balay 
11735615d1e5SSatish Balay #undef __FUNC__
11745615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1175ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
11760ac07820SSatish Balay {
11770ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
11780ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
11790ac07820SSatish Balay   Mat        B;
11800ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
11810ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
11820ac07820SSatish Balay   Scalar     *a;
11830ac07820SSatish Balay 
11840ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1185e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
11860ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
11870ac07820SSatish Balay   CHKERRQ(ierr);
11880ac07820SSatish Balay 
11890ac07820SSatish Balay   /* copy over the A part */
11900ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
11910ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11920ac07820SSatish Balay   row = baij->rstart;
11930ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
11940ac07820SSatish Balay 
11950ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
11960ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
11970ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
11980ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
11990ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
12000ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12010ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12020ac07820SSatish Balay         col++; a += bs;
12030ac07820SSatish Balay       }
12040ac07820SSatish Balay     }
12050ac07820SSatish Balay   }
12060ac07820SSatish Balay   /* copy over the B part */
12070ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
12080ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12090ac07820SSatish Balay   row = baij->rstart*bs;
12100ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12110ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12120ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12130ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12140ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
12150ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12160ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12170ac07820SSatish Balay         col++; a += bs;
12180ac07820SSatish Balay       }
12190ac07820SSatish Balay     }
12200ac07820SSatish Balay   }
12210ac07820SSatish Balay   PetscFree(rvals);
12220ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12230ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12240ac07820SSatish Balay 
12250ac07820SSatish Balay   if (matout != PETSC_NULL) {
12260ac07820SSatish Balay     *matout = B;
12270ac07820SSatish Balay   } else {
12280ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
12290ac07820SSatish Balay     PetscFree(baij->rowners);
12300ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
12310ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
12320ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
12330ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
12340ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
12350ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
12360ac07820SSatish Balay     PetscFree(baij);
12370ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
12380ac07820SSatish Balay     PetscHeaderDestroy(B);
12390ac07820SSatish Balay   }
12400ac07820SSatish Balay   return 0;
12410ac07820SSatish Balay }
12420e95ebc0SSatish Balay 
12435615d1e5SSatish Balay #undef __FUNC__
12445615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
12450e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
12460e95ebc0SSatish Balay {
12470e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
12480e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
12490e95ebc0SSatish Balay   int ierr,s1,s2,s3;
12500e95ebc0SSatish Balay 
12510e95ebc0SSatish Balay   if (ll)  {
12520e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
12530e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1254e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
12550e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
12560e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
12570e95ebc0SSatish Balay   }
1258e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
12590e95ebc0SSatish Balay   return 0;
12600e95ebc0SSatish Balay }
12610e95ebc0SSatish Balay 
12620ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
12630ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
12640ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
12650ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
12660ac07820SSatish Balay    routine.
12670ac07820SSatish Balay */
12685615d1e5SSatish Balay #undef __FUNC__
12695615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1270ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
12710ac07820SSatish Balay {
12720ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
12730ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
12740ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
12750ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
12760ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
12770ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
12780ac07820SSatish Balay   MPI_Comm       comm = A->comm;
12790ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
12800ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
12810ac07820SSatish Balay   IS             istmp;
12820ac07820SSatish Balay 
12830ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
12840ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
12850ac07820SSatish Balay 
12860ac07820SSatish Balay   /*  first count number of contributors to each processor */
12870ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
12880ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
12890ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
12900ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
12910ac07820SSatish Balay     idx = rows[i];
12920ac07820SSatish Balay     found = 0;
12930ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
12940ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
12950ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
12960ac07820SSatish Balay       }
12970ac07820SSatish Balay     }
1298e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
12990ac07820SSatish Balay   }
13000ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
13010ac07820SSatish Balay 
13020ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
13030ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
13040ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
13050ac07820SSatish Balay   nrecvs = work[rank];
13060ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
13070ac07820SSatish Balay   nmax = work[rank];
13080ac07820SSatish Balay   PetscFree(work);
13090ac07820SSatish Balay 
13100ac07820SSatish Balay   /* post receives:   */
13110ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
13120ac07820SSatish Balay   CHKPTRQ(rvalues);
13130ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
13140ac07820SSatish Balay   CHKPTRQ(recv_waits);
13150ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13160ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
13170ac07820SSatish Balay   }
13180ac07820SSatish Balay 
13190ac07820SSatish Balay   /* do sends:
13200ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
13210ac07820SSatish Balay          the ith processor
13220ac07820SSatish Balay   */
13230ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
13240ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
13250ac07820SSatish Balay   CHKPTRQ(send_waits);
13260ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
13270ac07820SSatish Balay   starts[0] = 0;
13280ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13290ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13300ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
13310ac07820SSatish Balay   }
13320ac07820SSatish Balay   ISRestoreIndices(is,&rows);
13330ac07820SSatish Balay 
13340ac07820SSatish Balay   starts[0] = 0;
13350ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13360ac07820SSatish Balay   count = 0;
13370ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
13380ac07820SSatish Balay     if (procs[i]) {
13390ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
13400ac07820SSatish Balay     }
13410ac07820SSatish Balay   }
13420ac07820SSatish Balay   PetscFree(starts);
13430ac07820SSatish Balay 
13440ac07820SSatish Balay   base = owners[rank]*bs;
13450ac07820SSatish Balay 
13460ac07820SSatish Balay   /*  wait on receives */
13470ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
13480ac07820SSatish Balay   source = lens + nrecvs;
13490ac07820SSatish Balay   count  = nrecvs; slen = 0;
13500ac07820SSatish Balay   while (count) {
13510ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
13520ac07820SSatish Balay     /* unpack receives into our local space */
13530ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
13540ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
13550ac07820SSatish Balay     lens[imdex]  = n;
13560ac07820SSatish Balay     slen += n;
13570ac07820SSatish Balay     count--;
13580ac07820SSatish Balay   }
13590ac07820SSatish Balay   PetscFree(recv_waits);
13600ac07820SSatish Balay 
13610ac07820SSatish Balay   /* move the data into the send scatter */
13620ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
13630ac07820SSatish Balay   count = 0;
13640ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13650ac07820SSatish Balay     values = rvalues + i*nmax;
13660ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
13670ac07820SSatish Balay       lrows[count++] = values[j] - base;
13680ac07820SSatish Balay     }
13690ac07820SSatish Balay   }
13700ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
13710ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
13720ac07820SSatish Balay 
13730ac07820SSatish Balay   /* actually zap the local rows */
1374029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
13750ac07820SSatish Balay   PLogObjectParent(A,istmp);
13760ac07820SSatish Balay   PetscFree(lrows);
13770ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
13780ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
13790ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
13800ac07820SSatish Balay 
13810ac07820SSatish Balay   /* wait on sends */
13820ac07820SSatish Balay   if (nsends) {
13830ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
13840ac07820SSatish Balay     CHKPTRQ(send_status);
13850ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
13860ac07820SSatish Balay     PetscFree(send_status);
13870ac07820SSatish Balay   }
13880ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
13890ac07820SSatish Balay 
13900ac07820SSatish Balay   return 0;
13910ac07820SSatish Balay }
1392ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
13935615d1e5SSatish Balay #undef __FUNC__
13945615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1395ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1396ba4ca20aSSatish Balay {
1397ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1398ba4ca20aSSatish Balay 
1399ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1400ba4ca20aSSatish Balay   else return 0;
1401ba4ca20aSSatish Balay }
14020ac07820SSatish Balay 
14035615d1e5SSatish Balay #undef __FUNC__
14045615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1405ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1406bb5a7306SBarry Smith {
1407bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1408bb5a7306SBarry Smith   int         ierr;
1409bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1410bb5a7306SBarry Smith   return 0;
1411bb5a7306SBarry Smith }
1412bb5a7306SBarry Smith 
14130ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
14140ac07820SSatish Balay 
141579bdfe76SSatish Balay /* -------------------------------------------------------------------*/
141679bdfe76SSatish Balay static struct _MatOps MatOps = {
1417bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
14184c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
14194c50302cSBarry Smith   0,0,0,0,
14200ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
14210e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
142258667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
14234c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
14244c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
14254c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
142694a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1427d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1428ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1429bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1430ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
143179bdfe76SSatish Balay 
143279bdfe76SSatish Balay 
14335615d1e5SSatish Balay #undef __FUNC__
14345615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
143579bdfe76SSatish Balay /*@C
143679bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
143779bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
143879bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
143979bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
144079bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
144179bdfe76SSatish Balay 
144279bdfe76SSatish Balay    Input Parameters:
144379bdfe76SSatish Balay .  comm - MPI communicator
144479bdfe76SSatish Balay .  bs   - size of blockk
144579bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
144692e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
144792e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
144892e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
144992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
145092e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
145179bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
145292e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
145379bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
145479bdfe76SSatish Balay            submatrix  (same for all local rows)
145592e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
145692e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
145792e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
145892e8d321SLois Curfman McInnes            it is zero.
145992e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
146079bdfe76SSatish Balay            submatrix (same for all local rows).
146192e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
146292e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
146392e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
146479bdfe76SSatish Balay 
146579bdfe76SSatish Balay    Output Parameter:
146679bdfe76SSatish Balay .  A - the matrix
146779bdfe76SSatish Balay 
146879bdfe76SSatish Balay    Notes:
146979bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
147079bdfe76SSatish Balay    (possibly both).
147179bdfe76SSatish Balay 
147279bdfe76SSatish Balay    Storage Information:
147379bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
147479bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
147579bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
147679bdfe76SSatish Balay    local matrix (a rectangular submatrix).
147779bdfe76SSatish Balay 
147879bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
147979bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
148079bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
148179bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
148279bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
148379bdfe76SSatish Balay 
148479bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
148579bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
148679bdfe76SSatish Balay 
148779bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
148879bdfe76SSatish Balay $         -------------------
148979bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
149079bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
149179bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
149279bdfe76SSatish Balay $         -------------------
149379bdfe76SSatish Balay $
149479bdfe76SSatish Balay 
149579bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
149679bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
149779bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
149857b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
149979bdfe76SSatish Balay 
150079bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
150179bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
150279bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
150392e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
150492e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
15056da5968aSLois Curfman McInnes    matrices.
150679bdfe76SSatish Balay 
150792e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
150879bdfe76SSatish Balay 
150979bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
151079bdfe76SSatish Balay @*/
151179bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
151279bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
151379bdfe76SSatish Balay {
151479bdfe76SSatish Balay   Mat          B;
151579bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
15164c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
151779bdfe76SSatish Balay 
1518e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
151979bdfe76SSatish Balay   *A = 0;
152079bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
152179bdfe76SSatish Balay   PLogObjectCreate(B);
152279bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
152379bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
152479bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
15254c50302cSBarry Smith   MPI_Comm_size(comm,&size);
15264c50302cSBarry Smith   if (size == 1) {
15274c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
15284c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
15294c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
15304c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
15314c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
15324c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
15334c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
15344c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
15354c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
15364c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
15374c50302cSBarry Smith   }
15384c50302cSBarry Smith 
153979bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
154079bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
154190f02eecSBarry Smith   B->mapping    = 0;
154279bdfe76SSatish Balay   B->factor     = 0;
154379bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
154479bdfe76SSatish Balay 
1545e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
154679bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
154779bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
154879bdfe76SSatish Balay 
154979bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1550e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1551e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1552e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1553cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1554cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
155579bdfe76SSatish Balay 
155679bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
155779bdfe76SSatish Balay     work[0] = m; work[1] = n;
155879bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
155979bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
156079bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
156179bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
156279bdfe76SSatish Balay   }
156379bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
156479bdfe76SSatish Balay     Mbs = M/bs;
1565e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
156679bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
156779bdfe76SSatish Balay     m   = mbs*bs;
156879bdfe76SSatish Balay   }
156979bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
157079bdfe76SSatish Balay     Nbs = N/bs;
1571e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
157279bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
157379bdfe76SSatish Balay     n   = nbs*bs;
157479bdfe76SSatish Balay   }
1575e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
157679bdfe76SSatish Balay 
157779bdfe76SSatish Balay   b->m = m; B->m = m;
157879bdfe76SSatish Balay   b->n = n; B->n = n;
157979bdfe76SSatish Balay   b->N = N; B->N = N;
158079bdfe76SSatish Balay   b->M = M; B->M = M;
158179bdfe76SSatish Balay   b->bs  = bs;
158279bdfe76SSatish Balay   b->bs2 = bs*bs;
158379bdfe76SSatish Balay   b->mbs = mbs;
158479bdfe76SSatish Balay   b->nbs = nbs;
158579bdfe76SSatish Balay   b->Mbs = Mbs;
158679bdfe76SSatish Balay   b->Nbs = Nbs;
158779bdfe76SSatish Balay 
158879bdfe76SSatish Balay   /* build local table of row and column ownerships */
158979bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
159079bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
15910ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
159279bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
159379bdfe76SSatish Balay   b->rowners[0] = 0;
159479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
159579bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
159679bdfe76SSatish Balay   }
159779bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
159879bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
15994fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
16004fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
16014fa0d573SSatish Balay 
160279bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
160379bdfe76SSatish Balay   b->cowners[0] = 0;
160479bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
160579bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
160679bdfe76SSatish Balay   }
160779bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
160879bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
16094fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
16104fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
161179bdfe76SSatish Balay 
161279bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1613029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
161479bdfe76SSatish Balay   PLogObjectParent(B,b->A);
161579bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1616029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
161779bdfe76SSatish Balay   PLogObjectParent(B,b->B);
161879bdfe76SSatish Balay 
161979bdfe76SSatish Balay   /* build cache for off array entries formed */
162079bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
162190f02eecSBarry Smith   b->donotstash  = 0;
162279bdfe76SSatish Balay   b->colmap      = 0;
162379bdfe76SSatish Balay   b->garray      = 0;
162479bdfe76SSatish Balay   b->roworiented = 1;
162579bdfe76SSatish Balay 
162630793edcSSatish Balay   /* stuff used in block assembly */
162730793edcSSatish Balay   b->barray      = 0;
162830793edcSSatish Balay 
162979bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
163079bdfe76SSatish Balay   b->lvec        = 0;
163179bdfe76SSatish Balay   b->Mvctx       = 0;
163279bdfe76SSatish Balay 
163379bdfe76SSatish Balay   /* stuff for MatGetRow() */
163479bdfe76SSatish Balay   b->rowindices   = 0;
163579bdfe76SSatish Balay   b->rowvalues    = 0;
163679bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
163779bdfe76SSatish Balay 
163879bdfe76SSatish Balay   *A = B;
163979bdfe76SSatish Balay   return 0;
164079bdfe76SSatish Balay }
1641026e39d0SSatish Balay 
16425615d1e5SSatish Balay #undef __FUNC__
16435615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
16440ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
16450ac07820SSatish Balay {
16460ac07820SSatish Balay   Mat         mat;
16470ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
16480ac07820SSatish Balay   int         ierr, len=0, flg;
16490ac07820SSatish Balay 
16500ac07820SSatish Balay   *newmat       = 0;
16510ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
16520ac07820SSatish Balay   PLogObjectCreate(mat);
16530ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
16540ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
16550ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
16560ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
16570ac07820SSatish Balay   mat->factor     = matin->factor;
16580ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
16590ac07820SSatish Balay 
16600ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
16610ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
16620ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
16630ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
16640ac07820SSatish Balay 
16650ac07820SSatish Balay   a->bs  = oldmat->bs;
16660ac07820SSatish Balay   a->bs2 = oldmat->bs2;
16670ac07820SSatish Balay   a->mbs = oldmat->mbs;
16680ac07820SSatish Balay   a->nbs = oldmat->nbs;
16690ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
16700ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
16710ac07820SSatish Balay 
16720ac07820SSatish Balay   a->rstart       = oldmat->rstart;
16730ac07820SSatish Balay   a->rend         = oldmat->rend;
16740ac07820SSatish Balay   a->cstart       = oldmat->cstart;
16750ac07820SSatish Balay   a->cend         = oldmat->cend;
16760ac07820SSatish Balay   a->size         = oldmat->size;
16770ac07820SSatish Balay   a->rank         = oldmat->rank;
1678e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
16790ac07820SSatish Balay   a->rowvalues    = 0;
16800ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
168130793edcSSatish Balay   a->barray       = 0;
16820ac07820SSatish Balay 
16830ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
16840ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
16850ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
16860ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
16870ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
16880ac07820SSatish Balay   if (oldmat->colmap) {
16890ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
16900ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
16910ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
16920ac07820SSatish Balay   } else a->colmap = 0;
16930ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
16940ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
16950ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
16960ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
16970ac07820SSatish Balay   } else a->garray = 0;
16980ac07820SSatish Balay 
16990ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
17000ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
17010ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
17020ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
17030ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
17040ac07820SSatish Balay   PLogObjectParent(mat,a->A);
17050ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
17060ac07820SSatish Balay   PLogObjectParent(mat,a->B);
17070ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
17080ac07820SSatish Balay   if (flg) {
17090ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
17100ac07820SSatish Balay   }
17110ac07820SSatish Balay   *newmat = mat;
17120ac07820SSatish Balay   return 0;
17130ac07820SSatish Balay }
171457b952d6SSatish Balay 
171557b952d6SSatish Balay #include "sys.h"
171657b952d6SSatish Balay 
17175615d1e5SSatish Balay #undef __FUNC__
17185615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
171957b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
172057b952d6SSatish Balay {
172157b952d6SSatish Balay   Mat          A;
172257b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
172357b952d6SSatish Balay   Scalar       *vals,*buf;
172457b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
172557b952d6SSatish Balay   MPI_Status   status;
1726cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
172757b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
172857b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
172957b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
173057b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
173157b952d6SSatish Balay 
173257b952d6SSatish Balay 
173357b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
173457b952d6SSatish Balay   bs2  = bs*bs;
173557b952d6SSatish Balay 
173657b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
173757b952d6SSatish Balay   if (!rank) {
173857b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
173957b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1740e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
174157b952d6SSatish Balay   }
174257b952d6SSatish Balay 
174357b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
174457b952d6SSatish Balay   M = header[1]; N = header[2];
174557b952d6SSatish Balay 
1746e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
174757b952d6SSatish Balay 
174857b952d6SSatish Balay   /*
174957b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
175057b952d6SSatish Balay      divisible by the blocksize
175157b952d6SSatish Balay   */
175257b952d6SSatish Balay   Mbs        = M/bs;
175357b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
175457b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
175557b952d6SSatish Balay   else                  Mbs++;
175657b952d6SSatish Balay   if (extra_rows &&!rank) {
1757b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
175857b952d6SSatish Balay   }
1759537820f0SBarry Smith 
176057b952d6SSatish Balay   /* determine ownership of all rows */
176157b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
176257b952d6SSatish Balay   m   = mbs * bs;
1763cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1764cee3aa6bSSatish Balay   browners = rowners + size + 1;
176557b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
176657b952d6SSatish Balay   rowners[0] = 0;
1767cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1768cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
176957b952d6SSatish Balay   rstart = rowners[rank];
177057b952d6SSatish Balay   rend   = rowners[rank+1];
177157b952d6SSatish Balay 
177257b952d6SSatish Balay   /* distribute row lengths to all processors */
177357b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
177457b952d6SSatish Balay   if (!rank) {
177557b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
177657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
177757b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
177857b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1779cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1780cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
178157b952d6SSatish Balay     PetscFree(sndcounts);
178257b952d6SSatish Balay   }
178357b952d6SSatish Balay   else {
178457b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
178557b952d6SSatish Balay   }
178657b952d6SSatish Balay 
178757b952d6SSatish Balay   if (!rank) {
178857b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
178957b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
179057b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
179157b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
179257b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
179357b952d6SSatish Balay         procsnz[i] += rowlengths[j];
179457b952d6SSatish Balay       }
179557b952d6SSatish Balay     }
179657b952d6SSatish Balay     PetscFree(rowlengths);
179757b952d6SSatish Balay 
179857b952d6SSatish Balay     /* determine max buffer needed and allocate it */
179957b952d6SSatish Balay     maxnz = 0;
180057b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
180157b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
180257b952d6SSatish Balay     }
180357b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
180457b952d6SSatish Balay 
180557b952d6SSatish Balay     /* read in my part of the matrix column indices  */
180657b952d6SSatish Balay     nz = procsnz[0];
180757b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
180857b952d6SSatish Balay     mycols = ibuf;
1809cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
181057b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1811cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1812cee3aa6bSSatish Balay 
181357b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
181457b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
181557b952d6SSatish Balay       nz = procsnz[i];
181657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
181757b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
181857b952d6SSatish Balay     }
181957b952d6SSatish Balay     /* read in the stuff for the last proc */
182057b952d6SSatish Balay     if ( size != 1 ) {
182157b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
182257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
182357b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1824cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
182557b952d6SSatish Balay     }
182657b952d6SSatish Balay     PetscFree(cols);
182757b952d6SSatish Balay   }
182857b952d6SSatish Balay   else {
182957b952d6SSatish Balay     /* determine buffer space needed for message */
183057b952d6SSatish Balay     nz = 0;
183157b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
183257b952d6SSatish Balay       nz += locrowlens[i];
183357b952d6SSatish Balay     }
183457b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
183557b952d6SSatish Balay     mycols = ibuf;
183657b952d6SSatish Balay     /* receive message of column indices*/
183757b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
183857b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1839e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
184057b952d6SSatish Balay   }
184157b952d6SSatish Balay 
184257b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1843cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1844cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
184557b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1846cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
184757b952d6SSatish Balay   masked1 = mask    + Mbs;
184857b952d6SSatish Balay   masked2 = masked1 + Mbs;
184957b952d6SSatish Balay   rowcount = 0; nzcount = 0;
185057b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
185157b952d6SSatish Balay     dcount  = 0;
185257b952d6SSatish Balay     odcount = 0;
185357b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
185457b952d6SSatish Balay       kmax = locrowlens[rowcount];
185557b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
185657b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
185757b952d6SSatish Balay         if (!mask[tmp]) {
185857b952d6SSatish Balay           mask[tmp] = 1;
185957b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
186057b952d6SSatish Balay           else masked1[dcount++] = tmp;
186157b952d6SSatish Balay         }
186257b952d6SSatish Balay       }
186357b952d6SSatish Balay       rowcount++;
186457b952d6SSatish Balay     }
1865cee3aa6bSSatish Balay 
186657b952d6SSatish Balay     dlens[i]  = dcount;
186757b952d6SSatish Balay     odlens[i] = odcount;
1868cee3aa6bSSatish Balay 
186957b952d6SSatish Balay     /* zero out the mask elements we set */
187057b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
187157b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
187257b952d6SSatish Balay   }
1873cee3aa6bSSatish Balay 
187457b952d6SSatish Balay   /* create our matrix */
1875537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1876537820f0SBarry Smith          CHKERRQ(ierr);
187757b952d6SSatish Balay   A = *newmat;
18786d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
187957b952d6SSatish Balay 
188057b952d6SSatish Balay   if (!rank) {
188157b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
188257b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
188357b952d6SSatish Balay     nz = procsnz[0];
188457b952d6SSatish Balay     vals = buf;
1885cee3aa6bSSatish Balay     mycols = ibuf;
1886cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
188757b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1888cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1889537820f0SBarry Smith 
189057b952d6SSatish Balay     /* insert into matrix */
189157b952d6SSatish Balay     jj      = rstart*bs;
189257b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
189357b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
189457b952d6SSatish Balay       mycols += locrowlens[i];
189557b952d6SSatish Balay       vals   += locrowlens[i];
189657b952d6SSatish Balay       jj++;
189757b952d6SSatish Balay     }
189857b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
189957b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
190057b952d6SSatish Balay       nz = procsnz[i];
190157b952d6SSatish Balay       vals = buf;
190257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
190357b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
190457b952d6SSatish Balay     }
190557b952d6SSatish Balay     /* the last proc */
190657b952d6SSatish Balay     if ( size != 1 ){
190757b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1908cee3aa6bSSatish Balay       vals = buf;
190957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
191057b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1911cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
191257b952d6SSatish Balay     }
191357b952d6SSatish Balay     PetscFree(procsnz);
191457b952d6SSatish Balay   }
191557b952d6SSatish Balay   else {
191657b952d6SSatish Balay     /* receive numeric values */
191757b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
191857b952d6SSatish Balay 
191957b952d6SSatish Balay     /* receive message of values*/
192057b952d6SSatish Balay     vals = buf;
1921cee3aa6bSSatish Balay     mycols = ibuf;
192257b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
192357b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1924e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
192557b952d6SSatish Balay 
192657b952d6SSatish Balay     /* insert into matrix */
192757b952d6SSatish Balay     jj      = rstart*bs;
1928cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
192957b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
193057b952d6SSatish Balay       mycols += locrowlens[i];
193157b952d6SSatish Balay       vals   += locrowlens[i];
193257b952d6SSatish Balay       jj++;
193357b952d6SSatish Balay     }
193457b952d6SSatish Balay   }
193557b952d6SSatish Balay   PetscFree(locrowlens);
193657b952d6SSatish Balay   PetscFree(buf);
193757b952d6SSatish Balay   PetscFree(ibuf);
193857b952d6SSatish Balay   PetscFree(rowners);
193957b952d6SSatish Balay   PetscFree(dlens);
1940cee3aa6bSSatish Balay   PetscFree(mask);
19416d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19426d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
194357b952d6SSatish Balay   return 0;
194457b952d6SSatish Balay }
194557b952d6SSatish Balay 
194657b952d6SSatish Balay 
1947