xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 96854ed6ad2ded53123cd28a73515071866c9e4e)
179bdfe76SSatish Balay #ifndef lint
2*96854ed6SLois Curfman McInnes static char vcid[] = "$Id: mpibaij.c,v 1.66 1997/04/10 00:03:33 bsmith Exp curfman $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2257b952d6SSatish Balay 
233b2fbd54SBarry Smith 
24537820f0SBarry Smith /*
25537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2857b952d6SSatish Balay    length of colmap equals the global matrix length.
2957b952d6SSatish Balay */
305615d1e5SSatish Balay #undef __FUNC__
315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3357b952d6SSatish Balay {
3457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
36928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3757b952d6SSatish Balay 
38758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4057b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
41928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4257b952d6SSatish Balay   return 0;
4357b952d6SSatish Balay }
4457b952d6SSatish Balay 
455615d1e5SSatish Balay #undef __FUNC__
465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
483b2fbd54SBarry Smith                             PetscTruth *done)
4957b952d6SSatish Balay {
503b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5157b952d6SSatish Balay   int         ierr;
523b2fbd54SBarry Smith   if (aij->size == 1) {
533b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
553b2fbd54SBarry Smith   return 0;
563b2fbd54SBarry Smith }
573b2fbd54SBarry Smith 
585615d1e5SSatish Balay #undef __FUNC__
595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
613b2fbd54SBarry Smith                                 PetscTruth *done)
623b2fbd54SBarry Smith {
633b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
643b2fbd54SBarry Smith   int        ierr;
653b2fbd54SBarry Smith   if (aij->size == 1) {
663b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
67e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6857b952d6SSatish Balay   return 0;
6957b952d6SSatish Balay }
7080c1aa95SSatish Balay #define CHUNKSIZE  10
7180c1aa95SSatish Balay 
72f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7380c1aa95SSatish Balay { \
7480c1aa95SSatish Balay  \
7580c1aa95SSatish Balay     brow = row/bs;  \
7680c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
77ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
7880c1aa95SSatish Balay       bcol = col/bs; \
7980c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
80ab26458aSBarry Smith       low = 0; high = nrow; \
81ab26458aSBarry Smith       while (high-low > 3) { \
82ab26458aSBarry Smith         t = (low+high)/2; \
83ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
84ab26458aSBarry Smith         else              low  = t; \
85ab26458aSBarry Smith       } \
86ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
8780c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8880c1aa95SSatish Balay         if (rp[_i] == bcol) { \
8980c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
90eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
91eada6651SSatish Balay           else                    *bap  = value;  \
92ac7a638eSSatish Balay           goto a_noinsert; \
9380c1aa95SSatish Balay         } \
9480c1aa95SSatish Balay       } \
95ac7a638eSSatish Balay       if (a->nonew) goto a_noinsert; \
9680c1aa95SSatish Balay       if (nrow >= rmax) { \
9780c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9880c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
9980c1aa95SSatish Balay         Scalar *new_a; \
10080c1aa95SSatish Balay  \
10180c1aa95SSatish Balay         /* malloc new storage space */ \
10280c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
10380c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
10480c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10680c1aa95SSatish Balay  \
10780c1aa95SSatish Balay         /* copy over old data into new slots */ \
10880c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
10980c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
11080c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
11180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
11280c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
11380c1aa95SSatish Balay                                                            len*sizeof(int)); \
11480c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11580c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11680c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
11780c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
11880c1aa95SSatish Balay         /* free up old matrix storage */ \
11980c1aa95SSatish Balay         PetscFree(a->a);  \
12080c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
12180c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
12280c1aa95SSatish Balay         a->singlemalloc = 1; \
12380c1aa95SSatish Balay  \
12480c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
125ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
12680c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
12780c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
12880c1aa95SSatish Balay         a->reallocs++; \
12980c1aa95SSatish Balay         a->nz++; \
13080c1aa95SSatish Balay       } \
13180c1aa95SSatish Balay       N = nrow++ - 1;  \
13280c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
13380c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
13480c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13580c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13680c1aa95SSatish Balay       } \
13780c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
13880c1aa95SSatish Balay       rp[_i]                      = bcol;  \
13980c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
140ac7a638eSSatish Balay       a_noinsert:; \
14180c1aa95SSatish Balay     ailen[brow] = nrow; \
14280c1aa95SSatish Balay }
14357b952d6SSatish Balay 
144ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
145ac7a638eSSatish Balay { \
146ac7a638eSSatish Balay  \
147ac7a638eSSatish Balay     brow = row/bs;  \
148ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
149ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
150ac7a638eSSatish Balay       bcol = col/bs; \
151ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
152ac7a638eSSatish Balay       low = 0; high = nrow; \
153ac7a638eSSatish Balay       while (high-low > 3) { \
154ac7a638eSSatish Balay         t = (low+high)/2; \
155ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
156ac7a638eSSatish Balay         else              low  = t; \
157ac7a638eSSatish Balay       } \
158ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
159ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
160ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
161ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
162ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
163ac7a638eSSatish Balay           else                    *bap  = value;  \
164ac7a638eSSatish Balay           goto b_noinsert; \
165ac7a638eSSatish Balay         } \
166ac7a638eSSatish Balay       } \
16774c639caSSatish Balay       if (b->nonew) goto b_noinsert; \
168ac7a638eSSatish Balay       if (nrow >= rmax) { \
169ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
17074c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
171ac7a638eSSatish Balay         Scalar *new_a; \
172ac7a638eSSatish Balay  \
173ac7a638eSSatish Balay         /* malloc new storage space */ \
17474c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
175ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
176ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
177ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
178ac7a638eSSatish Balay  \
179ac7a638eSSatish Balay         /* copy over old data into new slots */ \
180ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
18174c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
182ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
183ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
184ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
185ac7a638eSSatish Balay                                                            len*sizeof(int)); \
186ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
187ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
188ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
189ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
190ac7a638eSSatish Balay         /* free up old matrix storage */ \
19174c639caSSatish Balay         PetscFree(b->a);  \
19274c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
19374c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
19474c639caSSatish Balay         b->singlemalloc = 1; \
195ac7a638eSSatish Balay  \
196ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
197ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
19874c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
19974c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
20074c639caSSatish Balay         b->reallocs++; \
20174c639caSSatish Balay         b->nz++; \
202ac7a638eSSatish Balay       } \
203ac7a638eSSatish Balay       N = nrow++ - 1;  \
204ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
205ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
206ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
207ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
208ac7a638eSSatish Balay       } \
209ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
210ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
211ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
212ac7a638eSSatish Balay       b_noinsert:; \
213ac7a638eSSatish Balay     bilen[brow] = nrow; \
214ac7a638eSSatish Balay }
215ac7a638eSSatish Balay 
2165615d1e5SSatish Balay #undef __FUNC__
2175615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
218ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
21957b952d6SSatish Balay {
22057b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
22157b952d6SSatish Balay   Scalar      value;
2224fa0d573SSatish Balay   int         ierr,i,j,row,col;
2234fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2244fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2254fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
22657b952d6SSatish Balay 
227eada6651SSatish Balay   /* Some Variables required in the macro */
22880c1aa95SSatish Balay   Mat         A = baij->A;
22980c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
230ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
231ac7a638eSSatish Balay   Scalar      *aa=a->a;
232ac7a638eSSatish Balay 
233ac7a638eSSatish Balay   Mat         B = baij->B;
234ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
235ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
236ac7a638eSSatish Balay   Scalar      *ba=b->a;
237ac7a638eSSatish Balay 
238ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
239ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
240ac7a638eSSatish Balay   Scalar      *ap,*bap;
24180c1aa95SSatish Balay 
24257b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
243639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
244e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
245e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
246639f9d9dSBarry Smith #endif
24757b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
24857b952d6SSatish Balay       row = im[i] - rstart_orig;
24957b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
25057b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
25157b952d6SSatish Balay           col = in[j] - cstart_orig;
25257b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
253f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
25480c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25557b952d6SSatish Balay         }
256639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
257e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
258e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
259639f9d9dSBarry Smith #endif
26057b952d6SSatish Balay         else {
26157b952d6SSatish Balay           if (mat->was_assembled) {
262905e6a2fSBarry Smith             if (!baij->colmap) {
263905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
264905e6a2fSBarry Smith             }
265905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
26657b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
26757b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
26857b952d6SSatish Balay               col =  in[j];
26957b952d6SSatish Balay             }
27057b952d6SSatish Balay           }
27157b952d6SSatish Balay           else col = in[j];
27257b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
273ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
274ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
27557b952d6SSatish Balay         }
27657b952d6SSatish Balay       }
27757b952d6SSatish Balay     }
27857b952d6SSatish Balay     else {
27990f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
28057b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
28157b952d6SSatish Balay       }
28257b952d6SSatish Balay       else {
28390f02eecSBarry Smith         if (!baij->donotstash) {
28457b952d6SSatish Balay           row = im[i];
28557b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
28657b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
28757b952d6SSatish Balay           }
28857b952d6SSatish Balay         }
28957b952d6SSatish Balay       }
29057b952d6SSatish Balay     }
29190f02eecSBarry Smith   }
29257b952d6SSatish Balay   return 0;
29357b952d6SSatish Balay }
29457b952d6SSatish Balay 
295ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
296ab26458aSBarry Smith #undef __FUNC__
297ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
298ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
299ab26458aSBarry Smith {
300ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
30130793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
302abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
303ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
304ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
305ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
306ab26458aSBarry Smith 
30730793edcSSatish Balay 
30830793edcSSatish Balay   if(!barray) {
30930793edcSSatish Balay     barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
31030793edcSSatish Balay     baij->barray = barray;
31130793edcSSatish Balay   }
31230793edcSSatish Balay 
313ab26458aSBarry Smith   if (roworiented) {
314ab26458aSBarry Smith     stepval = (n-1)*bs;
315ab26458aSBarry Smith   } else {
316ab26458aSBarry Smith     stepval = (m-1)*bs;
317ab26458aSBarry Smith   }
318ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
319ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
320ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
321ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
322ab26458aSBarry Smith #endif
323ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
324ab26458aSBarry Smith       row = im[i] - rstart;
325ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
32615b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
32715b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
32815b57d14SSatish Balay           barray = v + i*bs2;
32915b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
33015b57d14SSatish Balay           barray = v + j*bs2;
33115b57d14SSatish Balay         } else { /* Here a copy is required */
332ab26458aSBarry Smith           if (roworiented) {
333ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
334ab26458aSBarry Smith           } else {
335ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
336abef11f7SSatish Balay           }
337ab26458aSBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval )
338ab26458aSBarry Smith             for (jj=0; jj<bs; jj++ )
33930793edcSSatish Balay               *barray++  = *value++;
34030793edcSSatish Balay           barray -=bs2;
34115b57d14SSatish Balay         }
342abef11f7SSatish Balay 
343abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
344abef11f7SSatish Balay           col = in[j] - cstart;
34530793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346ab26458aSBarry Smith         }
347ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
348ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
349ab26458aSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");}
350ab26458aSBarry Smith #endif
351ab26458aSBarry Smith         else {
352ab26458aSBarry Smith           if (mat->was_assembled) {
353ab26458aSBarry Smith             if (!baij->colmap) {
354ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
355ab26458aSBarry Smith             }
356ab26458aSBarry Smith             col = baij->colmap[in[j]] - 1;
357ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
359ab26458aSBarry Smith               col =  in[j];
360ab26458aSBarry Smith             }
361ab26458aSBarry Smith           }
362ab26458aSBarry Smith           else col = in[j];
36330793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364ab26458aSBarry Smith         }
365ab26458aSBarry Smith       }
366ab26458aSBarry Smith     }
367ab26458aSBarry Smith     else {
368ab26458aSBarry Smith       if (!baij->donotstash) {
369ab26458aSBarry Smith         if (roworiented ) {
370abef11f7SSatish Balay           row   = im[i]*bs;
371abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
372abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
373abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
374abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
375abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
376abef11f7SSatish Balay               }
377ab26458aSBarry Smith             }
378ab26458aSBarry Smith           }
379ab26458aSBarry Smith         }
380ab26458aSBarry Smith         else {
381ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
382abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
383abef11f7SSatish Balay             col   = in[j]*bs;
384abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
385abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
386abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
387ab26458aSBarry Smith               }
388ab26458aSBarry Smith             }
389ab26458aSBarry Smith           }
390abef11f7SSatish Balay 
391abef11f7SSatish Balay         }
392abef11f7SSatish Balay       }
393ab26458aSBarry Smith     }
394ab26458aSBarry Smith   }
395ab26458aSBarry Smith   return 0;
396ab26458aSBarry Smith }
397ab26458aSBarry Smith 
3985615d1e5SSatish Balay #undef __FUNC__
3995615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
400ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
401d6de1c52SSatish Balay {
402d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
403d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
404d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
405d6de1c52SSatish Balay 
406d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
407e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
408e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
409d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
410d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
411d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
412e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
413e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
414d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
415d6de1c52SSatish Balay           col = idxn[j] - bscstart;
416d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
417d6de1c52SSatish Balay         }
418d6de1c52SSatish Balay         else {
419905e6a2fSBarry Smith           if (!baij->colmap) {
420905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
421905e6a2fSBarry Smith           }
422e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
423dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
424d9d09a02SSatish Balay           else {
425dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
426d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
427d6de1c52SSatish Balay           }
428d6de1c52SSatish Balay         }
429d6de1c52SSatish Balay       }
430d9d09a02SSatish Balay     }
431d6de1c52SSatish Balay     else {
432e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
433d6de1c52SSatish Balay     }
434d6de1c52SSatish Balay   }
435d6de1c52SSatish Balay   return 0;
436d6de1c52SSatish Balay }
437d6de1c52SSatish Balay 
4385615d1e5SSatish Balay #undef __FUNC__
4395615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
440ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
441d6de1c52SSatish Balay {
442d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
443d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
444acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
445d6de1c52SSatish Balay   double     sum = 0.0;
446d6de1c52SSatish Balay   Scalar     *v;
447d6de1c52SSatish Balay 
448d6de1c52SSatish Balay   if (baij->size == 1) {
449d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
450d6de1c52SSatish Balay   } else {
451d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
452d6de1c52SSatish Balay       v = amat->a;
453d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
454d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
455d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
456d6de1c52SSatish Balay #else
457d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
458d6de1c52SSatish Balay #endif
459d6de1c52SSatish Balay       }
460d6de1c52SSatish Balay       v = bmat->a;
461d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
462d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
463d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
464d6de1c52SSatish Balay #else
465d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
466d6de1c52SSatish Balay #endif
467d6de1c52SSatish Balay       }
468d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
469d6de1c52SSatish Balay       *norm = sqrt(*norm);
470d6de1c52SSatish Balay     }
471acdf5bf4SSatish Balay     else
472e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
473d6de1c52SSatish Balay   }
474d6de1c52SSatish Balay   return 0;
475d6de1c52SSatish Balay }
47657b952d6SSatish Balay 
4775615d1e5SSatish Balay #undef __FUNC__
4785615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
479ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
48057b952d6SSatish Balay {
48157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
48257b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
48357b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
48457b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
48557b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
48657b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
48757b952d6SSatish Balay   InsertMode  addv;
48857b952d6SSatish Balay   Scalar      *rvalues,*svalues;
48957b952d6SSatish Balay 
49057b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
491e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
49257b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
493e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
49457b952d6SSatish Balay   }
495e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
49657b952d6SSatish Balay 
49757b952d6SSatish Balay   /*  first count number of contributors to each processor */
49857b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
49957b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
50057b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
50157b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
50257b952d6SSatish Balay     idx = baij->stash.idx[i];
50357b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
50457b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
50557b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
50657b952d6SSatish Balay       }
50757b952d6SSatish Balay     }
50857b952d6SSatish Balay   }
50957b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
51057b952d6SSatish Balay 
51157b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
51257b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
51357b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
51457b952d6SSatish Balay   nreceives = work[rank];
51557b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
51657b952d6SSatish Balay   nmax = work[rank];
51757b952d6SSatish Balay   PetscFree(work);
51857b952d6SSatish Balay 
51957b952d6SSatish Balay   /* post receives:
52057b952d6SSatish Balay        1) each message will consist of ordered pairs
52157b952d6SSatish Balay      (global index,value) we store the global index as a double
52257b952d6SSatish Balay      to simplify the message passing.
52357b952d6SSatish Balay        2) since we don't know how long each individual message is we
52457b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
52557b952d6SSatish Balay      this is a lot of wasted space.
52657b952d6SSatish Balay 
52757b952d6SSatish Balay 
52857b952d6SSatish Balay        This could be done better.
52957b952d6SSatish Balay   */
53057b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
53157b952d6SSatish Balay   CHKPTRQ(rvalues);
53257b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
53357b952d6SSatish Balay   CHKPTRQ(recv_waits);
53457b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
53557b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
53657b952d6SSatish Balay               comm,recv_waits+i);
53757b952d6SSatish Balay   }
53857b952d6SSatish Balay 
53957b952d6SSatish Balay   /* do sends:
54057b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
54157b952d6SSatish Balay          the ith processor
54257b952d6SSatish Balay   */
54357b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
54457b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
54557b952d6SSatish Balay   CHKPTRQ(send_waits);
54657b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
54757b952d6SSatish Balay   starts[0] = 0;
54857b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
54957b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
55057b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
55157b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
55257b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
55357b952d6SSatish Balay   }
55457b952d6SSatish Balay   PetscFree(owner);
55557b952d6SSatish Balay   starts[0] = 0;
55657b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
55757b952d6SSatish Balay   count = 0;
55857b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
55957b952d6SSatish Balay     if (procs[i]) {
56057b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
56157b952d6SSatish Balay                 comm,send_waits+count++);
56257b952d6SSatish Balay     }
56357b952d6SSatish Balay   }
56457b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
56557b952d6SSatish Balay 
56657b952d6SSatish Balay   /* Free cache space */
567d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
56857b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
56957b952d6SSatish Balay 
57057b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
57157b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
57257b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
57357b952d6SSatish Balay   baij->rmax       = nmax;
57457b952d6SSatish Balay 
57557b952d6SSatish Balay   return 0;
57657b952d6SSatish Balay }
57757b952d6SSatish Balay 
57857b952d6SSatish Balay 
5795615d1e5SSatish Balay #undef __FUNC__
5805615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
581ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
58257b952d6SSatish Balay {
58357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
58457b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
58557b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
58657b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
58757b952d6SSatish Balay   Scalar      *values,val;
588e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
58957b952d6SSatish Balay 
59057b952d6SSatish Balay   /*  wait on receives */
59157b952d6SSatish Balay   while (count) {
59257b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
59357b952d6SSatish Balay     /* unpack receives into our local space */
59457b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
59557b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
59657b952d6SSatish Balay     n = n/3;
59757b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
59857b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
59957b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
60057b952d6SSatish Balay       val = values[3*i+2];
60157b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
60257b952d6SSatish Balay         col -= baij->cstart*bs;
60357b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
60457b952d6SSatish Balay       }
60557b952d6SSatish Balay       else {
60657b952d6SSatish Balay         if (mat->was_assembled) {
607905e6a2fSBarry Smith           if (!baij->colmap) {
608905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
609905e6a2fSBarry Smith           }
610905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
61157b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
61257b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
61357b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
61457b952d6SSatish Balay           }
61557b952d6SSatish Balay         }
61657b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
61757b952d6SSatish Balay       }
61857b952d6SSatish Balay     }
61957b952d6SSatish Balay     count--;
62057b952d6SSatish Balay   }
62157b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
62257b952d6SSatish Balay 
62357b952d6SSatish Balay   /* wait on sends */
62457b952d6SSatish Balay   if (baij->nsends) {
62557b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
62657b952d6SSatish Balay     CHKPTRQ(send_status);
62757b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
62857b952d6SSatish Balay     PetscFree(send_status);
62957b952d6SSatish Balay   }
63057b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
63157b952d6SSatish Balay 
63257b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
63357b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
63457b952d6SSatish Balay 
63557b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
63657b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
63757b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
63857b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
63957b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
64057b952d6SSatish Balay   }
64157b952d6SSatish Balay 
6426d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
64357b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
64457b952d6SSatish Balay   }
64557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
64657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
64757b952d6SSatish Balay 
64857b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
64957b952d6SSatish Balay   return 0;
65057b952d6SSatish Balay }
65157b952d6SSatish Balay 
6525615d1e5SSatish Balay #undef __FUNC__
6535615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
65457b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
65557b952d6SSatish Balay {
65657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
65757b952d6SSatish Balay   int          ierr;
65857b952d6SSatish Balay 
65957b952d6SSatish Balay   if (baij->size == 1) {
66057b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
66157b952d6SSatish Balay   }
662e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
66357b952d6SSatish Balay   return 0;
66457b952d6SSatish Balay }
66557b952d6SSatish Balay 
6665615d1e5SSatish Balay #undef __FUNC__
6675615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
66857b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
66957b952d6SSatish Balay {
67057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
671cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
67257b952d6SSatish Balay   FILE         *fd;
67357b952d6SSatish Balay   ViewerType   vtype;
67457b952d6SSatish Balay 
67557b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
67657b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
67757b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
678639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
6794e220ebcSLois Curfman McInnes       MatInfo info;
68057b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
68157b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6824e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
68357b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
68457b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
6854e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
6864e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
6874e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
6884e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
6894e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
6904e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
69157b952d6SSatish Balay       fflush(fd);
69257b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
69357b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
69457b952d6SSatish Balay       return 0;
69557b952d6SSatish Balay     }
696639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
697bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
69857b952d6SSatish Balay       return 0;
69957b952d6SSatish Balay     }
70057b952d6SSatish Balay   }
70157b952d6SSatish Balay 
70257b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
70357b952d6SSatish Balay     Draw       draw;
70457b952d6SSatish Balay     PetscTruth isnull;
70557b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
70657b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
70757b952d6SSatish Balay   }
70857b952d6SSatish Balay 
70957b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
71057b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
71157b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
71257b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
71357b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
71457b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
71557b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
71657b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
71757b952d6SSatish Balay     fflush(fd);
71857b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
71957b952d6SSatish Balay   }
72057b952d6SSatish Balay   else {
72157b952d6SSatish Balay     int size = baij->size;
72257b952d6SSatish Balay     rank = baij->rank;
72357b952d6SSatish Balay     if (size == 1) {
72457b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
72557b952d6SSatish Balay     }
72657b952d6SSatish Balay     else {
72757b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
72857b952d6SSatish Balay       Mat         A;
72957b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
73057b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
73157b952d6SSatish Balay       int         mbs=baij->mbs;
73257b952d6SSatish Balay       Scalar      *a;
73357b952d6SSatish Balay 
73457b952d6SSatish Balay       if (!rank) {
735cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
73657b952d6SSatish Balay         CHKERRQ(ierr);
73757b952d6SSatish Balay       }
73857b952d6SSatish Balay       else {
739cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
74057b952d6SSatish Balay         CHKERRQ(ierr);
74157b952d6SSatish Balay       }
74257b952d6SSatish Balay       PLogObjectParent(mat,A);
74357b952d6SSatish Balay 
74457b952d6SSatish Balay       /* copy over the A part */
74557b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
74657b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
74757b952d6SSatish Balay       row = baij->rstart;
74857b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
74957b952d6SSatish Balay 
75057b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
75157b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
75257b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
75357b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
75457b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
75557b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
756cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
757cee3aa6bSSatish Balay             col++; a += bs;
75857b952d6SSatish Balay           }
75957b952d6SSatish Balay         }
76057b952d6SSatish Balay       }
76157b952d6SSatish Balay       /* copy over the B part */
76257b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
76357b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
76457b952d6SSatish Balay       row = baij->rstart*bs;
76557b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
76657b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
76757b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
76857b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
76957b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
77057b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
771cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
772cee3aa6bSSatish Balay             col++; a += bs;
77357b952d6SSatish Balay           }
77457b952d6SSatish Balay         }
77557b952d6SSatish Balay       }
77657b952d6SSatish Balay       PetscFree(rvals);
7776d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7786d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
77957b952d6SSatish Balay       if (!rank) {
78057b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
78157b952d6SSatish Balay       }
78257b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
78357b952d6SSatish Balay     }
78457b952d6SSatish Balay   }
78557b952d6SSatish Balay   return 0;
78657b952d6SSatish Balay }
78757b952d6SSatish Balay 
78857b952d6SSatish Balay 
78957b952d6SSatish Balay 
7905615d1e5SSatish Balay #undef __FUNC__
7915615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
792ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
79357b952d6SSatish Balay {
79457b952d6SSatish Balay   Mat         mat = (Mat) obj;
79557b952d6SSatish Balay   int         ierr;
79657b952d6SSatish Balay   ViewerType  vtype;
79757b952d6SSatish Balay 
79857b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
79957b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
80057b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
80157b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
80257b952d6SSatish Balay   }
80357b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
80457b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
80557b952d6SSatish Balay   }
80657b952d6SSatish Balay   return 0;
80757b952d6SSatish Balay }
80857b952d6SSatish Balay 
8095615d1e5SSatish Balay #undef __FUNC__
8105615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
811ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
81279bdfe76SSatish Balay {
81379bdfe76SSatish Balay   Mat         mat = (Mat) obj;
81479bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
81579bdfe76SSatish Balay   int         ierr;
81679bdfe76SSatish Balay 
81779bdfe76SSatish Balay #if defined(PETSC_LOG)
81879bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
81979bdfe76SSatish Balay #endif
82079bdfe76SSatish Balay 
82179bdfe76SSatish Balay   PetscFree(baij->rowners);
82279bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
82379bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
82479bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
82579bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
82679bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
82779bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
82879bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
82930793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
83079bdfe76SSatish Balay   PetscFree(baij);
83190f02eecSBarry Smith   if (mat->mapping) {
83290f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
83390f02eecSBarry Smith   }
83479bdfe76SSatish Balay   PLogObjectDestroy(mat);
83579bdfe76SSatish Balay   PetscHeaderDestroy(mat);
83679bdfe76SSatish Balay   return 0;
83779bdfe76SSatish Balay }
83879bdfe76SSatish Balay 
8395615d1e5SSatish Balay #undef __FUNC__
8405615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
841ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
842cee3aa6bSSatish Balay {
843cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
84447b4a8eaSLois Curfman McInnes   int         ierr, nt;
845cee3aa6bSSatish Balay 
846c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
84747b4a8eaSLois Curfman McInnes   if (nt != a->n) {
848ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
84947b4a8eaSLois Curfman McInnes   }
850c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
85147b4a8eaSLois Curfman McInnes   if (nt != a->m) {
852e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
85347b4a8eaSLois Curfman McInnes   }
85443a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
855cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
85643a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
857cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
85843a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
859cee3aa6bSSatish Balay   return 0;
860cee3aa6bSSatish Balay }
861cee3aa6bSSatish Balay 
8625615d1e5SSatish Balay #undef __FUNC__
8635615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
864ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
865cee3aa6bSSatish Balay {
866cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
867cee3aa6bSSatish Balay   int        ierr;
86843a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
869cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
87043a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
871cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
872cee3aa6bSSatish Balay   return 0;
873cee3aa6bSSatish Balay }
874cee3aa6bSSatish Balay 
8755615d1e5SSatish Balay #undef __FUNC__
8765615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
877ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
878cee3aa6bSSatish Balay {
879cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
880cee3aa6bSSatish Balay   int        ierr;
881cee3aa6bSSatish Balay 
882cee3aa6bSSatish Balay   /* do nondiagonal part */
883cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
884cee3aa6bSSatish Balay   /* send it on its way */
885537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
886cee3aa6bSSatish Balay   /* do local part */
887cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
888cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
889cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
890cee3aa6bSSatish Balay   /* but is not perhaps always true. */
891639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
892cee3aa6bSSatish Balay   return 0;
893cee3aa6bSSatish Balay }
894cee3aa6bSSatish Balay 
8955615d1e5SSatish Balay #undef __FUNC__
8965615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
897ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
898cee3aa6bSSatish Balay {
899cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
900cee3aa6bSSatish Balay   int        ierr;
901cee3aa6bSSatish Balay 
902cee3aa6bSSatish Balay   /* do nondiagonal part */
903cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
904cee3aa6bSSatish Balay   /* send it on its way */
905537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
906cee3aa6bSSatish Balay   /* do local part */
907cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
908cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
909cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
910cee3aa6bSSatish Balay   /* but is not perhaps always true. */
911537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
912cee3aa6bSSatish Balay   return 0;
913cee3aa6bSSatish Balay }
914cee3aa6bSSatish Balay 
915cee3aa6bSSatish Balay /*
916cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
917cee3aa6bSSatish Balay    diagonal block
918cee3aa6bSSatish Balay */
9195615d1e5SSatish Balay #undef __FUNC__
9205615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
921ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
922cee3aa6bSSatish Balay {
923cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
924cee3aa6bSSatish Balay   if (a->M != a->N)
925e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
926cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
927cee3aa6bSSatish Balay }
928cee3aa6bSSatish Balay 
9295615d1e5SSatish Balay #undef __FUNC__
9305615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
931ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
932cee3aa6bSSatish Balay {
933cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
934cee3aa6bSSatish Balay   int        ierr;
935cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
936cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
937cee3aa6bSSatish Balay   return 0;
938cee3aa6bSSatish Balay }
939026e39d0SSatish Balay 
9405615d1e5SSatish Balay #undef __FUNC__
9415615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
942ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
94357b952d6SSatish Balay {
94457b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
94557b952d6SSatish Balay   *m = mat->M; *n = mat->N;
94657b952d6SSatish Balay   return 0;
94757b952d6SSatish Balay }
94857b952d6SSatish Balay 
9495615d1e5SSatish Balay #undef __FUNC__
9505615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
951ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
95257b952d6SSatish Balay {
95357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
95457b952d6SSatish Balay   *m = mat->m; *n = mat->N;
95557b952d6SSatish Balay   return 0;
95657b952d6SSatish Balay }
95757b952d6SSatish Balay 
9585615d1e5SSatish Balay #undef __FUNC__
9595615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
960ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
96157b952d6SSatish Balay {
96257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
96357b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
96457b952d6SSatish Balay   return 0;
96557b952d6SSatish Balay }
96657b952d6SSatish Balay 
967acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
968acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
969acdf5bf4SSatish Balay 
9705615d1e5SSatish Balay #undef __FUNC__
9715615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
972acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
973acdf5bf4SSatish Balay {
974acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
975acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
976acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
977d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
978d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
979acdf5bf4SSatish Balay 
980e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
981acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
982acdf5bf4SSatish Balay 
983acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
984acdf5bf4SSatish Balay     /*
985acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
986acdf5bf4SSatish Balay     */
987acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
988bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
989bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
990acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
991acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
992acdf5bf4SSatish Balay     }
993acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
994acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
995acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
996acdf5bf4SSatish Balay   }
997acdf5bf4SSatish Balay 
998acdf5bf4SSatish Balay 
999e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
1000d9d09a02SSatish Balay   lrow = row - brstart;
1001acdf5bf4SSatish Balay 
1002acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1003acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1004acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1005acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1006acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1007acdf5bf4SSatish Balay   nztot = nzA + nzB;
1008acdf5bf4SSatish Balay 
1009acdf5bf4SSatish Balay   cmap  = mat->garray;
1010acdf5bf4SSatish Balay   if (v  || idx) {
1011acdf5bf4SSatish Balay     if (nztot) {
1012acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1013acdf5bf4SSatish Balay       int imark = -1;
1014acdf5bf4SSatish Balay       if (v) {
1015acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1016acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1017d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1018acdf5bf4SSatish Balay           else break;
1019acdf5bf4SSatish Balay         }
1020acdf5bf4SSatish Balay         imark = i;
1021acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1022acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1023acdf5bf4SSatish Balay       }
1024acdf5bf4SSatish Balay       if (idx) {
1025acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1026acdf5bf4SSatish Balay         if (imark > -1) {
1027acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1028bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1029acdf5bf4SSatish Balay           }
1030acdf5bf4SSatish Balay         } else {
1031acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1032d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1033d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1034acdf5bf4SSatish Balay             else break;
1035acdf5bf4SSatish Balay           }
1036acdf5bf4SSatish Balay           imark = i;
1037acdf5bf4SSatish Balay         }
1038d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1039d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1040acdf5bf4SSatish Balay       }
1041acdf5bf4SSatish Balay     }
1042d212a18eSSatish Balay     else {
1043d212a18eSSatish Balay       if (idx) *idx = 0;
1044d212a18eSSatish Balay       if (v)   *v   = 0;
1045d212a18eSSatish Balay     }
1046acdf5bf4SSatish Balay   }
1047acdf5bf4SSatish Balay   *nz = nztot;
1048acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1049acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1050acdf5bf4SSatish Balay   return 0;
1051acdf5bf4SSatish Balay }
1052acdf5bf4SSatish Balay 
10535615d1e5SSatish Balay #undef __FUNC__
10545615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1055acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1056acdf5bf4SSatish Balay {
1057acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1058acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1059e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
1060acdf5bf4SSatish Balay   }
1061acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
1062acdf5bf4SSatish Balay   return 0;
1063acdf5bf4SSatish Balay }
1064acdf5bf4SSatish Balay 
10655615d1e5SSatish Balay #undef __FUNC__
10665615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1067ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
10685a838052SSatish Balay {
10695a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
10705a838052SSatish Balay   *bs = baij->bs;
10715a838052SSatish Balay   return 0;
10725a838052SSatish Balay }
10735a838052SSatish Balay 
10745615d1e5SSatish Balay #undef __FUNC__
10755615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1076ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
107758667388SSatish Balay {
107858667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
107958667388SSatish Balay   int         ierr;
108058667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
108158667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
108258667388SSatish Balay   return 0;
108358667388SSatish Balay }
10840ac07820SSatish Balay 
10855615d1e5SSatish Balay #undef __FUNC__
10865615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1087ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
10880ac07820SSatish Balay {
10894e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
10904e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
10917d57db60SLois Curfman McInnes   int         ierr;
10927d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
10930ac07820SSatish Balay 
10944e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
10954e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
10964e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
10974e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
10984e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
10994e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
11004e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
11014e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
11024e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
11030ac07820SSatish Balay   if (flag == MAT_LOCAL) {
11044e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
11054e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
11064e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
11074e220ebcSLois Curfman McInnes     info->memory       = isend[3];
11084e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
11090ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1110dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
11114e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11124e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11134e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11144e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11154e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11160ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1117dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
11184e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11194e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11204e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11214e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11224e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11230ac07820SSatish Balay   }
11244e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
11254e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
11264e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
11270ac07820SSatish Balay   return 0;
11280ac07820SSatish Balay }
11290ac07820SSatish Balay 
11305615d1e5SSatish Balay #undef __FUNC__
11315615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1132ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
113358667388SSatish Balay {
113458667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
113558667388SSatish Balay 
113658667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
113758667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
11386da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1139c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
1140*96854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1141c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1142b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1143b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1144b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1145aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
114658667388SSatish Balay         MatSetOption(a->A,op);
114758667388SSatish Balay         MatSetOption(a->B,op);
1148b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
11496da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
115058667388SSatish Balay              op == MAT_SYMMETRIC ||
115158667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
115258667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
115358667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
115458667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
115558667388SSatish Balay     a->roworiented = 0;
115658667388SSatish Balay     MatSetOption(a->A,op);
115758667388SSatish Balay     MatSetOption(a->B,op);
11582b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
115990f02eecSBarry Smith     a->donotstash = 1;
116090f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1161e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
116258667388SSatish Balay   else
1163e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
116458667388SSatish Balay   return 0;
116558667388SSatish Balay }
116658667388SSatish Balay 
11675615d1e5SSatish Balay #undef __FUNC__
11685615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1169ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
11700ac07820SSatish Balay {
11710ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
11720ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
11730ac07820SSatish Balay   Mat        B;
11740ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
11750ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
11760ac07820SSatish Balay   Scalar     *a;
11770ac07820SSatish Balay 
11780ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1179e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
11800ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
11810ac07820SSatish Balay   CHKERRQ(ierr);
11820ac07820SSatish Balay 
11830ac07820SSatish Balay   /* copy over the A part */
11840ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
11850ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11860ac07820SSatish Balay   row = baij->rstart;
11870ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
11880ac07820SSatish Balay 
11890ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
11900ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
11910ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
11920ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
11930ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
11940ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
11950ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
11960ac07820SSatish Balay         col++; a += bs;
11970ac07820SSatish Balay       }
11980ac07820SSatish Balay     }
11990ac07820SSatish Balay   }
12000ac07820SSatish Balay   /* copy over the B part */
12010ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
12020ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12030ac07820SSatish Balay   row = baij->rstart*bs;
12040ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12050ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12060ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12070ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12080ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
12090ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12100ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12110ac07820SSatish Balay         col++; a += bs;
12120ac07820SSatish Balay       }
12130ac07820SSatish Balay     }
12140ac07820SSatish Balay   }
12150ac07820SSatish Balay   PetscFree(rvals);
12160ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12170ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12180ac07820SSatish Balay 
12190ac07820SSatish Balay   if (matout != PETSC_NULL) {
12200ac07820SSatish Balay     *matout = B;
12210ac07820SSatish Balay   } else {
12220ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
12230ac07820SSatish Balay     PetscFree(baij->rowners);
12240ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
12250ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
12260ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
12270ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
12280ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
12290ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
12300ac07820SSatish Balay     PetscFree(baij);
12310ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
12320ac07820SSatish Balay     PetscHeaderDestroy(B);
12330ac07820SSatish Balay   }
12340ac07820SSatish Balay   return 0;
12350ac07820SSatish Balay }
12360e95ebc0SSatish Balay 
12375615d1e5SSatish Balay #undef __FUNC__
12385615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
12390e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
12400e95ebc0SSatish Balay {
12410e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
12420e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
12430e95ebc0SSatish Balay   int ierr,s1,s2,s3;
12440e95ebc0SSatish Balay 
12450e95ebc0SSatish Balay   if (ll)  {
12460e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
12470e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1248e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
12490e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
12500e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
12510e95ebc0SSatish Balay   }
1252e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
12530e95ebc0SSatish Balay   return 0;
12540e95ebc0SSatish Balay }
12550e95ebc0SSatish Balay 
12560ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
12570ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
12580ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
12590ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
12600ac07820SSatish Balay    routine.
12610ac07820SSatish Balay */
12625615d1e5SSatish Balay #undef __FUNC__
12635615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1264ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
12650ac07820SSatish Balay {
12660ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
12670ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
12680ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
12690ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
12700ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
12710ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
12720ac07820SSatish Balay   MPI_Comm       comm = A->comm;
12730ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
12740ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
12750ac07820SSatish Balay   IS             istmp;
12760ac07820SSatish Balay 
12770ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
12780ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
12790ac07820SSatish Balay 
12800ac07820SSatish Balay   /*  first count number of contributors to each processor */
12810ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
12820ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
12830ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
12840ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
12850ac07820SSatish Balay     idx = rows[i];
12860ac07820SSatish Balay     found = 0;
12870ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
12880ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
12890ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
12900ac07820SSatish Balay       }
12910ac07820SSatish Balay     }
1292e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
12930ac07820SSatish Balay   }
12940ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
12950ac07820SSatish Balay 
12960ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
12970ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
12980ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
12990ac07820SSatish Balay   nrecvs = work[rank];
13000ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
13010ac07820SSatish Balay   nmax = work[rank];
13020ac07820SSatish Balay   PetscFree(work);
13030ac07820SSatish Balay 
13040ac07820SSatish Balay   /* post receives:   */
13050ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
13060ac07820SSatish Balay   CHKPTRQ(rvalues);
13070ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
13080ac07820SSatish Balay   CHKPTRQ(recv_waits);
13090ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13100ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
13110ac07820SSatish Balay   }
13120ac07820SSatish Balay 
13130ac07820SSatish Balay   /* do sends:
13140ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
13150ac07820SSatish Balay          the ith processor
13160ac07820SSatish Balay   */
13170ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
13180ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
13190ac07820SSatish Balay   CHKPTRQ(send_waits);
13200ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
13210ac07820SSatish Balay   starts[0] = 0;
13220ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13230ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13240ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
13250ac07820SSatish Balay   }
13260ac07820SSatish Balay   ISRestoreIndices(is,&rows);
13270ac07820SSatish Balay 
13280ac07820SSatish Balay   starts[0] = 0;
13290ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13300ac07820SSatish Balay   count = 0;
13310ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
13320ac07820SSatish Balay     if (procs[i]) {
13330ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
13340ac07820SSatish Balay     }
13350ac07820SSatish Balay   }
13360ac07820SSatish Balay   PetscFree(starts);
13370ac07820SSatish Balay 
13380ac07820SSatish Balay   base = owners[rank]*bs;
13390ac07820SSatish Balay 
13400ac07820SSatish Balay   /*  wait on receives */
13410ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
13420ac07820SSatish Balay   source = lens + nrecvs;
13430ac07820SSatish Balay   count  = nrecvs; slen = 0;
13440ac07820SSatish Balay   while (count) {
13450ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
13460ac07820SSatish Balay     /* unpack receives into our local space */
13470ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
13480ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
13490ac07820SSatish Balay     lens[imdex]  = n;
13500ac07820SSatish Balay     slen += n;
13510ac07820SSatish Balay     count--;
13520ac07820SSatish Balay   }
13530ac07820SSatish Balay   PetscFree(recv_waits);
13540ac07820SSatish Balay 
13550ac07820SSatish Balay   /* move the data into the send scatter */
13560ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
13570ac07820SSatish Balay   count = 0;
13580ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13590ac07820SSatish Balay     values = rvalues + i*nmax;
13600ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
13610ac07820SSatish Balay       lrows[count++] = values[j] - base;
13620ac07820SSatish Balay     }
13630ac07820SSatish Balay   }
13640ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
13650ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
13660ac07820SSatish Balay 
13670ac07820SSatish Balay   /* actually zap the local rows */
1368029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
13690ac07820SSatish Balay   PLogObjectParent(A,istmp);
13700ac07820SSatish Balay   PetscFree(lrows);
13710ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
13720ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
13730ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
13740ac07820SSatish Balay 
13750ac07820SSatish Balay   /* wait on sends */
13760ac07820SSatish Balay   if (nsends) {
13770ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
13780ac07820SSatish Balay     CHKPTRQ(send_status);
13790ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
13800ac07820SSatish Balay     PetscFree(send_status);
13810ac07820SSatish Balay   }
13820ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
13830ac07820SSatish Balay 
13840ac07820SSatish Balay   return 0;
13850ac07820SSatish Balay }
1386ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
13875615d1e5SSatish Balay #undef __FUNC__
13885615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1389ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1390ba4ca20aSSatish Balay {
1391ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1392ba4ca20aSSatish Balay 
1393ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1394ba4ca20aSSatish Balay   else return 0;
1395ba4ca20aSSatish Balay }
13960ac07820SSatish Balay 
13975615d1e5SSatish Balay #undef __FUNC__
13985615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1399ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1400bb5a7306SBarry Smith {
1401bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1402bb5a7306SBarry Smith   int         ierr;
1403bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1404bb5a7306SBarry Smith   return 0;
1405bb5a7306SBarry Smith }
1406bb5a7306SBarry Smith 
14070ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
14080ac07820SSatish Balay 
140979bdfe76SSatish Balay /* -------------------------------------------------------------------*/
141079bdfe76SSatish Balay static struct _MatOps MatOps = {
1411bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
14124c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
14134c50302cSBarry Smith   0,0,0,0,
14140ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
14150e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
141658667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
14174c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
14184c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
14194c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
142094a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1421d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1422ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1423bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1424ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
142579bdfe76SSatish Balay 
142679bdfe76SSatish Balay 
14275615d1e5SSatish Balay #undef __FUNC__
14285615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
142979bdfe76SSatish Balay /*@C
143079bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
143179bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
143279bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
143379bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
143479bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
143579bdfe76SSatish Balay 
143679bdfe76SSatish Balay    Input Parameters:
143779bdfe76SSatish Balay .  comm - MPI communicator
143879bdfe76SSatish Balay .  bs   - size of blockk
143979bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
144092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
144192e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
144292e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
144392e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
144492e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
144579bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
144692e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
144779bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
144879bdfe76SSatish Balay            submatrix  (same for all local rows)
144992e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
145092e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
145192e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
145292e8d321SLois Curfman McInnes            it is zero.
145392e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
145479bdfe76SSatish Balay            submatrix (same for all local rows).
145592e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
145692e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
145792e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
145879bdfe76SSatish Balay 
145979bdfe76SSatish Balay    Output Parameter:
146079bdfe76SSatish Balay .  A - the matrix
146179bdfe76SSatish Balay 
146279bdfe76SSatish Balay    Notes:
146379bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
146479bdfe76SSatish Balay    (possibly both).
146579bdfe76SSatish Balay 
146679bdfe76SSatish Balay    Storage Information:
146779bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
146879bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
146979bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
147079bdfe76SSatish Balay    local matrix (a rectangular submatrix).
147179bdfe76SSatish Balay 
147279bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
147379bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
147479bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
147579bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
147679bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
147779bdfe76SSatish Balay 
147879bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
147979bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
148079bdfe76SSatish Balay 
148179bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
148279bdfe76SSatish Balay $         -------------------
148379bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
148479bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
148579bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
148679bdfe76SSatish Balay $         -------------------
148779bdfe76SSatish Balay $
148879bdfe76SSatish Balay 
148979bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
149079bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
149179bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
149257b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
149379bdfe76SSatish Balay 
149479bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
149579bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
149679bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
149792e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
149892e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
14996da5968aSLois Curfman McInnes    matrices.
150079bdfe76SSatish Balay 
150192e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
150279bdfe76SSatish Balay 
150379bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
150479bdfe76SSatish Balay @*/
150579bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
150679bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
150779bdfe76SSatish Balay {
150879bdfe76SSatish Balay   Mat          B;
150979bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
15104c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
151179bdfe76SSatish Balay 
1512e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
151379bdfe76SSatish Balay   *A = 0;
151479bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
151579bdfe76SSatish Balay   PLogObjectCreate(B);
151679bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
151779bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
151879bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
15194c50302cSBarry Smith   MPI_Comm_size(comm,&size);
15204c50302cSBarry Smith   if (size == 1) {
15214c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
15224c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
15234c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
15244c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
15254c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
15264c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
15274c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
15284c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
15294c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
15304c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
15314c50302cSBarry Smith   }
15324c50302cSBarry Smith 
153379bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
153479bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
153590f02eecSBarry Smith   B->mapping    = 0;
153679bdfe76SSatish Balay   B->factor     = 0;
153779bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
153879bdfe76SSatish Balay 
1539e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
154079bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
154179bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
154279bdfe76SSatish Balay 
154379bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1544e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1545e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1546e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1547cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1548cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
154979bdfe76SSatish Balay 
155079bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
155179bdfe76SSatish Balay     work[0] = m; work[1] = n;
155279bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
155379bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
155479bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
155579bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
155679bdfe76SSatish Balay   }
155779bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
155879bdfe76SSatish Balay     Mbs = M/bs;
1559e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
156079bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
156179bdfe76SSatish Balay     m   = mbs*bs;
156279bdfe76SSatish Balay   }
156379bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
156479bdfe76SSatish Balay     Nbs = N/bs;
1565e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
156679bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
156779bdfe76SSatish Balay     n   = nbs*bs;
156879bdfe76SSatish Balay   }
1569e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
157079bdfe76SSatish Balay 
157179bdfe76SSatish Balay   b->m = m; B->m = m;
157279bdfe76SSatish Balay   b->n = n; B->n = n;
157379bdfe76SSatish Balay   b->N = N; B->N = N;
157479bdfe76SSatish Balay   b->M = M; B->M = M;
157579bdfe76SSatish Balay   b->bs  = bs;
157679bdfe76SSatish Balay   b->bs2 = bs*bs;
157779bdfe76SSatish Balay   b->mbs = mbs;
157879bdfe76SSatish Balay   b->nbs = nbs;
157979bdfe76SSatish Balay   b->Mbs = Mbs;
158079bdfe76SSatish Balay   b->Nbs = Nbs;
158179bdfe76SSatish Balay 
158279bdfe76SSatish Balay   /* build local table of row and column ownerships */
158379bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
158479bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
15850ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
158679bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
158779bdfe76SSatish Balay   b->rowners[0] = 0;
158879bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
158979bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
159079bdfe76SSatish Balay   }
159179bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
159279bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
15934fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
15944fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
15954fa0d573SSatish Balay 
159679bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
159779bdfe76SSatish Balay   b->cowners[0] = 0;
159879bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
159979bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
160079bdfe76SSatish Balay   }
160179bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
160279bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
16034fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
16044fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
160579bdfe76SSatish Balay 
160679bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1607029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
160879bdfe76SSatish Balay   PLogObjectParent(B,b->A);
160979bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1610029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
161179bdfe76SSatish Balay   PLogObjectParent(B,b->B);
161279bdfe76SSatish Balay 
161379bdfe76SSatish Balay   /* build cache for off array entries formed */
161479bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
161590f02eecSBarry Smith   b->donotstash  = 0;
161679bdfe76SSatish Balay   b->colmap      = 0;
161779bdfe76SSatish Balay   b->garray      = 0;
161879bdfe76SSatish Balay   b->roworiented = 1;
161979bdfe76SSatish Balay 
162030793edcSSatish Balay   /* stuff used in block assembly */
162130793edcSSatish Balay   b->barray      = 0;
162230793edcSSatish Balay 
162379bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
162479bdfe76SSatish Balay   b->lvec        = 0;
162579bdfe76SSatish Balay   b->Mvctx       = 0;
162679bdfe76SSatish Balay 
162779bdfe76SSatish Balay   /* stuff for MatGetRow() */
162879bdfe76SSatish Balay   b->rowindices   = 0;
162979bdfe76SSatish Balay   b->rowvalues    = 0;
163079bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
163179bdfe76SSatish Balay 
163279bdfe76SSatish Balay   *A = B;
163379bdfe76SSatish Balay   return 0;
163479bdfe76SSatish Balay }
1635026e39d0SSatish Balay 
16365615d1e5SSatish Balay #undef __FUNC__
16375615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
16380ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
16390ac07820SSatish Balay {
16400ac07820SSatish Balay   Mat         mat;
16410ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
16420ac07820SSatish Balay   int         ierr, len=0, flg;
16430ac07820SSatish Balay 
16440ac07820SSatish Balay   *newmat       = 0;
16450ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
16460ac07820SSatish Balay   PLogObjectCreate(mat);
16470ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
16480ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
16490ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
16500ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
16510ac07820SSatish Balay   mat->factor     = matin->factor;
16520ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
16530ac07820SSatish Balay 
16540ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
16550ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
16560ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
16570ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
16580ac07820SSatish Balay 
16590ac07820SSatish Balay   a->bs  = oldmat->bs;
16600ac07820SSatish Balay   a->bs2 = oldmat->bs2;
16610ac07820SSatish Balay   a->mbs = oldmat->mbs;
16620ac07820SSatish Balay   a->nbs = oldmat->nbs;
16630ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
16640ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
16650ac07820SSatish Balay 
16660ac07820SSatish Balay   a->rstart       = oldmat->rstart;
16670ac07820SSatish Balay   a->rend         = oldmat->rend;
16680ac07820SSatish Balay   a->cstart       = oldmat->cstart;
16690ac07820SSatish Balay   a->cend         = oldmat->cend;
16700ac07820SSatish Balay   a->size         = oldmat->size;
16710ac07820SSatish Balay   a->rank         = oldmat->rank;
1672e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
16730ac07820SSatish Balay   a->rowvalues    = 0;
16740ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
167530793edcSSatish Balay   a->barray       = 0;
16760ac07820SSatish Balay 
16770ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
16780ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
16790ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
16800ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
16810ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
16820ac07820SSatish Balay   if (oldmat->colmap) {
16830ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
16840ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
16850ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
16860ac07820SSatish Balay   } else a->colmap = 0;
16870ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
16880ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
16890ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
16900ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
16910ac07820SSatish Balay   } else a->garray = 0;
16920ac07820SSatish Balay 
16930ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
16940ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
16950ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
16960ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
16970ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
16980ac07820SSatish Balay   PLogObjectParent(mat,a->A);
16990ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
17000ac07820SSatish Balay   PLogObjectParent(mat,a->B);
17010ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
17020ac07820SSatish Balay   if (flg) {
17030ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
17040ac07820SSatish Balay   }
17050ac07820SSatish Balay   *newmat = mat;
17060ac07820SSatish Balay   return 0;
17070ac07820SSatish Balay }
170857b952d6SSatish Balay 
170957b952d6SSatish Balay #include "sys.h"
171057b952d6SSatish Balay 
17115615d1e5SSatish Balay #undef __FUNC__
17125615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
171357b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
171457b952d6SSatish Balay {
171557b952d6SSatish Balay   Mat          A;
171657b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
171757b952d6SSatish Balay   Scalar       *vals,*buf;
171857b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
171957b952d6SSatish Balay   MPI_Status   status;
1720cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
172157b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
172257b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
172357b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
172457b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
172557b952d6SSatish Balay 
172657b952d6SSatish Balay 
172757b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
172857b952d6SSatish Balay   bs2  = bs*bs;
172957b952d6SSatish Balay 
173057b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
173157b952d6SSatish Balay   if (!rank) {
173257b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
173357b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1734e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
173557b952d6SSatish Balay   }
173657b952d6SSatish Balay 
173757b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
173857b952d6SSatish Balay   M = header[1]; N = header[2];
173957b952d6SSatish Balay 
1740e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
174157b952d6SSatish Balay 
174257b952d6SSatish Balay   /*
174357b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
174457b952d6SSatish Balay      divisible by the blocksize
174557b952d6SSatish Balay   */
174657b952d6SSatish Balay   Mbs        = M/bs;
174757b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
174857b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
174957b952d6SSatish Balay   else                  Mbs++;
175057b952d6SSatish Balay   if (extra_rows &&!rank) {
1751b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
175257b952d6SSatish Balay   }
1753537820f0SBarry Smith 
175457b952d6SSatish Balay   /* determine ownership of all rows */
175557b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
175657b952d6SSatish Balay   m   = mbs * bs;
1757cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1758cee3aa6bSSatish Balay   browners = rowners + size + 1;
175957b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
176057b952d6SSatish Balay   rowners[0] = 0;
1761cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1762cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
176357b952d6SSatish Balay   rstart = rowners[rank];
176457b952d6SSatish Balay   rend   = rowners[rank+1];
176557b952d6SSatish Balay 
176657b952d6SSatish Balay   /* distribute row lengths to all processors */
176757b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
176857b952d6SSatish Balay   if (!rank) {
176957b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
177057b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
177157b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
177257b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1773cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1774cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
177557b952d6SSatish Balay     PetscFree(sndcounts);
177657b952d6SSatish Balay   }
177757b952d6SSatish Balay   else {
177857b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
177957b952d6SSatish Balay   }
178057b952d6SSatish Balay 
178157b952d6SSatish Balay   if (!rank) {
178257b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
178357b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
178457b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
178557b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
178657b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
178757b952d6SSatish Balay         procsnz[i] += rowlengths[j];
178857b952d6SSatish Balay       }
178957b952d6SSatish Balay     }
179057b952d6SSatish Balay     PetscFree(rowlengths);
179157b952d6SSatish Balay 
179257b952d6SSatish Balay     /* determine max buffer needed and allocate it */
179357b952d6SSatish Balay     maxnz = 0;
179457b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
179557b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
179657b952d6SSatish Balay     }
179757b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
179857b952d6SSatish Balay 
179957b952d6SSatish Balay     /* read in my part of the matrix column indices  */
180057b952d6SSatish Balay     nz = procsnz[0];
180157b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
180257b952d6SSatish Balay     mycols = ibuf;
1803cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
180457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1805cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1806cee3aa6bSSatish Balay 
180757b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
180857b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
180957b952d6SSatish Balay       nz = procsnz[i];
181057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
181157b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
181257b952d6SSatish Balay     }
181357b952d6SSatish Balay     /* read in the stuff for the last proc */
181457b952d6SSatish Balay     if ( size != 1 ) {
181557b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
181657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
181757b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1818cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
181957b952d6SSatish Balay     }
182057b952d6SSatish Balay     PetscFree(cols);
182157b952d6SSatish Balay   }
182257b952d6SSatish Balay   else {
182357b952d6SSatish Balay     /* determine buffer space needed for message */
182457b952d6SSatish Balay     nz = 0;
182557b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
182657b952d6SSatish Balay       nz += locrowlens[i];
182757b952d6SSatish Balay     }
182857b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
182957b952d6SSatish Balay     mycols = ibuf;
183057b952d6SSatish Balay     /* receive message of column indices*/
183157b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
183257b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1833e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
183457b952d6SSatish Balay   }
183557b952d6SSatish Balay 
183657b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1837cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1838cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
183957b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1840cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
184157b952d6SSatish Balay   masked1 = mask    + Mbs;
184257b952d6SSatish Balay   masked2 = masked1 + Mbs;
184357b952d6SSatish Balay   rowcount = 0; nzcount = 0;
184457b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
184557b952d6SSatish Balay     dcount  = 0;
184657b952d6SSatish Balay     odcount = 0;
184757b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
184857b952d6SSatish Balay       kmax = locrowlens[rowcount];
184957b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
185057b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
185157b952d6SSatish Balay         if (!mask[tmp]) {
185257b952d6SSatish Balay           mask[tmp] = 1;
185357b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
185457b952d6SSatish Balay           else masked1[dcount++] = tmp;
185557b952d6SSatish Balay         }
185657b952d6SSatish Balay       }
185757b952d6SSatish Balay       rowcount++;
185857b952d6SSatish Balay     }
1859cee3aa6bSSatish Balay 
186057b952d6SSatish Balay     dlens[i]  = dcount;
186157b952d6SSatish Balay     odlens[i] = odcount;
1862cee3aa6bSSatish Balay 
186357b952d6SSatish Balay     /* zero out the mask elements we set */
186457b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
186557b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
186657b952d6SSatish Balay   }
1867cee3aa6bSSatish Balay 
186857b952d6SSatish Balay   /* create our matrix */
1869537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1870537820f0SBarry Smith          CHKERRQ(ierr);
187157b952d6SSatish Balay   A = *newmat;
18726d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
187357b952d6SSatish Balay 
187457b952d6SSatish Balay   if (!rank) {
187557b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
187657b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
187757b952d6SSatish Balay     nz = procsnz[0];
187857b952d6SSatish Balay     vals = buf;
1879cee3aa6bSSatish Balay     mycols = ibuf;
1880cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
188157b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1882cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1883537820f0SBarry Smith 
188457b952d6SSatish Balay     /* insert into matrix */
188557b952d6SSatish Balay     jj      = rstart*bs;
188657b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
188757b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
188857b952d6SSatish Balay       mycols += locrowlens[i];
188957b952d6SSatish Balay       vals   += locrowlens[i];
189057b952d6SSatish Balay       jj++;
189157b952d6SSatish Balay     }
189257b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
189357b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
189457b952d6SSatish Balay       nz = procsnz[i];
189557b952d6SSatish Balay       vals = buf;
189657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
189757b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
189857b952d6SSatish Balay     }
189957b952d6SSatish Balay     /* the last proc */
190057b952d6SSatish Balay     if ( size != 1 ){
190157b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1902cee3aa6bSSatish Balay       vals = buf;
190357b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
190457b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1905cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
190657b952d6SSatish Balay     }
190757b952d6SSatish Balay     PetscFree(procsnz);
190857b952d6SSatish Balay   }
190957b952d6SSatish Balay   else {
191057b952d6SSatish Balay     /* receive numeric values */
191157b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
191257b952d6SSatish Balay 
191357b952d6SSatish Balay     /* receive message of values*/
191457b952d6SSatish Balay     vals = buf;
1915cee3aa6bSSatish Balay     mycols = ibuf;
191657b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
191757b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1918e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
191957b952d6SSatish Balay 
192057b952d6SSatish Balay     /* insert into matrix */
192157b952d6SSatish Balay     jj      = rstart*bs;
1922cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
192357b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
192457b952d6SSatish Balay       mycols += locrowlens[i];
192557b952d6SSatish Balay       vals   += locrowlens[i];
192657b952d6SSatish Balay       jj++;
192757b952d6SSatish Balay     }
192857b952d6SSatish Balay   }
192957b952d6SSatish Balay   PetscFree(locrowlens);
193057b952d6SSatish Balay   PetscFree(buf);
193157b952d6SSatish Balay   PetscFree(ibuf);
193257b952d6SSatish Balay   PetscFree(rowners);
193357b952d6SSatish Balay   PetscFree(dlens);
1934cee3aa6bSSatish Balay   PetscFree(mask);
19356d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19366d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
193757b952d6SSatish Balay   return 0;
193857b952d6SSatish Balay }
193957b952d6SSatish Balay 
194057b952d6SSatish Balay 
1941