xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2b36279985d181bf1ae28d424577ae2c2fbebdf4)
179bdfe76SSatish Balay #ifndef lint
2*2b362799SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.63 1997/04/01 14:34:09 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2257b952d6SSatish Balay 
233b2fbd54SBarry Smith 
24537820f0SBarry Smith /*
25537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2857b952d6SSatish Balay    length of colmap equals the global matrix length.
2957b952d6SSatish Balay */
305615d1e5SSatish Balay #undef __FUNC__
315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3357b952d6SSatish Balay {
3457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
36928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3757b952d6SSatish Balay 
3857b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
3957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4057b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
41928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4257b952d6SSatish Balay   return 0;
4357b952d6SSatish Balay }
4457b952d6SSatish Balay 
455615d1e5SSatish Balay #undef __FUNC__
465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
483b2fbd54SBarry Smith                             PetscTruth *done)
4957b952d6SSatish Balay {
503b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5157b952d6SSatish Balay   int         ierr;
523b2fbd54SBarry Smith   if (aij->size == 1) {
533b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
553b2fbd54SBarry Smith   return 0;
563b2fbd54SBarry Smith }
573b2fbd54SBarry Smith 
585615d1e5SSatish Balay #undef __FUNC__
595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
613b2fbd54SBarry Smith                                 PetscTruth *done)
623b2fbd54SBarry Smith {
633b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
643b2fbd54SBarry Smith   int        ierr;
653b2fbd54SBarry Smith   if (aij->size == 1) {
663b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
67e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6857b952d6SSatish Balay   return 0;
6957b952d6SSatish Balay }
7080c1aa95SSatish Balay #define CHUNKSIZE  10
7180c1aa95SSatish Balay 
72f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7380c1aa95SSatish Balay { \
7480c1aa95SSatish Balay  \
7580c1aa95SSatish Balay     brow = row/bs;  \
7680c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
77ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
7880c1aa95SSatish Balay       bcol = col/bs; \
7980c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
80ab26458aSBarry Smith       low = 0; high = nrow; \
81ab26458aSBarry Smith       while (high-low > 3) { \
82ab26458aSBarry Smith         t = (low+high)/2; \
83ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
84ab26458aSBarry Smith         else              low  = t; \
85ab26458aSBarry Smith       } \
86ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
8780c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8880c1aa95SSatish Balay         if (rp[_i] == bcol) { \
8980c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
90eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
91eada6651SSatish Balay           else                    *bap  = value;  \
92ac7a638eSSatish Balay           goto a_noinsert; \
9380c1aa95SSatish Balay         } \
9480c1aa95SSatish Balay       } \
95ac7a638eSSatish Balay       if (a->nonew) goto a_noinsert; \
9680c1aa95SSatish Balay       if (nrow >= rmax) { \
9780c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9880c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
9980c1aa95SSatish Balay         Scalar *new_a; \
10080c1aa95SSatish Balay  \
10180c1aa95SSatish Balay         /* malloc new storage space */ \
10280c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
10380c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
10480c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10680c1aa95SSatish Balay  \
10780c1aa95SSatish Balay         /* copy over old data into new slots */ \
10880c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
10980c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
11080c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
11180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
11280c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
11380c1aa95SSatish Balay                                                            len*sizeof(int)); \
11480c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11580c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11680c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
11780c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
11880c1aa95SSatish Balay         /* free up old matrix storage */ \
11980c1aa95SSatish Balay         PetscFree(a->a);  \
12080c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
12180c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
12280c1aa95SSatish Balay         a->singlemalloc = 1; \
12380c1aa95SSatish Balay  \
12480c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
125ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
12680c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
12780c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
12880c1aa95SSatish Balay         a->reallocs++; \
12980c1aa95SSatish Balay         a->nz++; \
13080c1aa95SSatish Balay       } \
13180c1aa95SSatish Balay       N = nrow++ - 1;  \
13280c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
13380c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
13480c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13580c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13680c1aa95SSatish Balay       } \
13780c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
13880c1aa95SSatish Balay       rp[_i]                      = bcol;  \
13980c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
140ac7a638eSSatish Balay       a_noinsert:; \
14180c1aa95SSatish Balay     ailen[brow] = nrow; \
14280c1aa95SSatish Balay }
14357b952d6SSatish Balay 
144ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
145ac7a638eSSatish Balay { \
146ac7a638eSSatish Balay  \
147ac7a638eSSatish Balay     brow = row/bs;  \
148ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
149ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
150ac7a638eSSatish Balay       bcol = col/bs; \
151ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
152ac7a638eSSatish Balay       low = 0; high = nrow; \
153ac7a638eSSatish Balay       while (high-low > 3) { \
154ac7a638eSSatish Balay         t = (low+high)/2; \
155ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
156ac7a638eSSatish Balay         else              low  = t; \
157ac7a638eSSatish Balay       } \
158ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
159ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
160ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
161ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
162ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
163ac7a638eSSatish Balay           else                    *bap  = value;  \
164ac7a638eSSatish Balay           goto b_noinsert; \
165ac7a638eSSatish Balay         } \
166ac7a638eSSatish Balay       } \
16774c639caSSatish Balay       if (b->nonew) goto b_noinsert; \
168ac7a638eSSatish Balay       if (nrow >= rmax) { \
169ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
17074c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
171ac7a638eSSatish Balay         Scalar *new_a; \
172ac7a638eSSatish Balay  \
173ac7a638eSSatish Balay         /* malloc new storage space */ \
17474c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
175ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
176ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
177ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
178ac7a638eSSatish Balay  \
179ac7a638eSSatish Balay         /* copy over old data into new slots */ \
180ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
18174c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
182ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
183ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
184ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
185ac7a638eSSatish Balay                                                            len*sizeof(int)); \
186ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
187ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
188ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
189ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
190ac7a638eSSatish Balay         /* free up old matrix storage */ \
19174c639caSSatish Balay         PetscFree(b->a);  \
19274c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
19374c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
19474c639caSSatish Balay         b->singlemalloc = 1; \
195ac7a638eSSatish Balay  \
196ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
197ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
19874c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
19974c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
20074c639caSSatish Balay         b->reallocs++; \
20174c639caSSatish Balay         b->nz++; \
202ac7a638eSSatish Balay       } \
203ac7a638eSSatish Balay       N = nrow++ - 1;  \
204ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
205ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
206ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
207ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
208ac7a638eSSatish Balay       } \
209ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
210ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
211ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
212ac7a638eSSatish Balay       b_noinsert:; \
213ac7a638eSSatish Balay     bilen[brow] = nrow; \
214ac7a638eSSatish Balay }
215ac7a638eSSatish Balay 
2165615d1e5SSatish Balay #undef __FUNC__
2175615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
218ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
21957b952d6SSatish Balay {
22057b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
22157b952d6SSatish Balay   Scalar      value;
2224fa0d573SSatish Balay   int         ierr,i,j,row,col;
2234fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2244fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2254fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
22657b952d6SSatish Balay 
227eada6651SSatish Balay   /* Some Variables required in the macro */
22880c1aa95SSatish Balay   Mat         A = baij->A;
22980c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
230ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
231ac7a638eSSatish Balay   Scalar      *aa=a->a;
232ac7a638eSSatish Balay 
233ac7a638eSSatish Balay   Mat         B = baij->B;
234ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
235ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
236ac7a638eSSatish Balay   Scalar      *ba=b->a;
237ac7a638eSSatish Balay 
238ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
239ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
240ac7a638eSSatish Balay   Scalar      *ap,*bap;
24180c1aa95SSatish Balay 
24257b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
243639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
244e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
245e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
246639f9d9dSBarry Smith #endif
24757b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
24857b952d6SSatish Balay       row = im[i] - rstart_orig;
24957b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
25057b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
25157b952d6SSatish Balay           col = in[j] - cstart_orig;
25257b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
253f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
25480c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25557b952d6SSatish Balay         }
256639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
257e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
258e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
259639f9d9dSBarry Smith #endif
26057b952d6SSatish Balay         else {
26157b952d6SSatish Balay           if (mat->was_assembled) {
262905e6a2fSBarry Smith             if (!baij->colmap) {
263905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
264905e6a2fSBarry Smith             }
265905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
26657b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
26757b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
26857b952d6SSatish Balay               col =  in[j];
26957b952d6SSatish Balay             }
27057b952d6SSatish Balay           }
27157b952d6SSatish Balay           else col = in[j];
27257b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
273ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
274ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
27557b952d6SSatish Balay         }
27657b952d6SSatish Balay       }
27757b952d6SSatish Balay     }
27857b952d6SSatish Balay     else {
27990f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
28057b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
28157b952d6SSatish Balay       }
28257b952d6SSatish Balay       else {
28390f02eecSBarry Smith         if (!baij->donotstash) {
28457b952d6SSatish Balay           row = im[i];
28557b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
28657b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
28757b952d6SSatish Balay           }
28857b952d6SSatish Balay         }
28957b952d6SSatish Balay       }
29057b952d6SSatish Balay     }
29190f02eecSBarry Smith   }
29257b952d6SSatish Balay   return 0;
29357b952d6SSatish Balay }
29457b952d6SSatish Balay 
295ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
296ab26458aSBarry Smith #undef __FUNC__
297ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
298ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
299ab26458aSBarry Smith {
300ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
30130793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
302abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
303ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
304ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
305ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
306ab26458aSBarry Smith 
30730793edcSSatish Balay 
30830793edcSSatish Balay   if(!barray) {
30930793edcSSatish Balay     barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
31030793edcSSatish Balay     baij->barray = barray;
31130793edcSSatish Balay   }
31230793edcSSatish Balay 
313ab26458aSBarry Smith   if (roworiented) {
314ab26458aSBarry Smith     stepval = (n-1)*bs;
315ab26458aSBarry Smith   } else {
316ab26458aSBarry Smith     stepval = (m-1)*bs;
317ab26458aSBarry Smith   }
318ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
319ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
320ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
321ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
322ab26458aSBarry Smith #endif
323ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
324ab26458aSBarry Smith       row = im[i] - rstart;
325ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
32615b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
32715b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
32815b57d14SSatish Balay           barray = v + i*bs2;
32915b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
33015b57d14SSatish Balay           barray = v + j*bs2;
33115b57d14SSatish Balay         } else { /* Here a copy is required */
332ab26458aSBarry Smith           if (roworiented) {
333ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
334ab26458aSBarry Smith           } else {
335ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
336abef11f7SSatish Balay           }
337ab26458aSBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval )
338ab26458aSBarry Smith             for (jj=0; jj<bs; jj++ )
33930793edcSSatish Balay               *barray++  = *value++;
34030793edcSSatish Balay           barray -=bs2;
34115b57d14SSatish Balay         }
342abef11f7SSatish Balay 
343abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
344abef11f7SSatish Balay           col = in[j] - cstart;
34530793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346ab26458aSBarry Smith         }
347ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
348ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
349ab26458aSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");}
350ab26458aSBarry Smith #endif
351ab26458aSBarry Smith         else {
352ab26458aSBarry Smith           if (mat->was_assembled) {
353ab26458aSBarry Smith             if (!baij->colmap) {
354ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
355ab26458aSBarry Smith             }
356ab26458aSBarry Smith             col = baij->colmap[in[j]] - 1;
357ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
359ab26458aSBarry Smith               col =  in[j];
360ab26458aSBarry Smith             }
361ab26458aSBarry Smith           }
362ab26458aSBarry Smith           else col = in[j];
36330793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364ab26458aSBarry Smith         }
365ab26458aSBarry Smith       }
366ab26458aSBarry Smith     }
367ab26458aSBarry Smith     else {
368ab26458aSBarry Smith       if (!baij->donotstash) {
369ab26458aSBarry Smith         if (roworiented ) {
370abef11f7SSatish Balay           row   = im[i]*bs;
371abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
372abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
373abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
374abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
375abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
376abef11f7SSatish Balay               }
377ab26458aSBarry Smith             }
378ab26458aSBarry Smith           }
379ab26458aSBarry Smith         }
380ab26458aSBarry Smith         else {
381ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
382abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
383abef11f7SSatish Balay             col   = in[j]*bs;
384abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
385abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
386abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
387ab26458aSBarry Smith               }
388ab26458aSBarry Smith             }
389ab26458aSBarry Smith           }
390abef11f7SSatish Balay 
391abef11f7SSatish Balay         }
392abef11f7SSatish Balay       }
393ab26458aSBarry Smith     }
394ab26458aSBarry Smith   }
395ab26458aSBarry Smith   return 0;
396ab26458aSBarry Smith }
397ab26458aSBarry Smith 
3985615d1e5SSatish Balay #undef __FUNC__
3995615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
400ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
401d6de1c52SSatish Balay {
402d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
403d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
404d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
405d6de1c52SSatish Balay 
406d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
407e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
408e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
409d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
410d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
411d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
412e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
413e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
414d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
415d6de1c52SSatish Balay           col = idxn[j] - bscstart;
416d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
417d6de1c52SSatish Balay         }
418d6de1c52SSatish Balay         else {
419905e6a2fSBarry Smith           if (!baij->colmap) {
420905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
421905e6a2fSBarry Smith           }
422e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
423dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
424d9d09a02SSatish Balay           else {
425dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
426d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
427d6de1c52SSatish Balay           }
428d6de1c52SSatish Balay         }
429d6de1c52SSatish Balay       }
430d9d09a02SSatish Balay     }
431d6de1c52SSatish Balay     else {
432e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
433d6de1c52SSatish Balay     }
434d6de1c52SSatish Balay   }
435d6de1c52SSatish Balay   return 0;
436d6de1c52SSatish Balay }
437d6de1c52SSatish Balay 
4385615d1e5SSatish Balay #undef __FUNC__
4395615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
440ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
441d6de1c52SSatish Balay {
442d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
443d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
444acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
445d6de1c52SSatish Balay   double     sum = 0.0;
446d6de1c52SSatish Balay   Scalar     *v;
447d6de1c52SSatish Balay 
448d6de1c52SSatish Balay   if (baij->size == 1) {
449d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
450d6de1c52SSatish Balay   } else {
451d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
452d6de1c52SSatish Balay       v = amat->a;
453d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
454d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
455d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
456d6de1c52SSatish Balay #else
457d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
458d6de1c52SSatish Balay #endif
459d6de1c52SSatish Balay       }
460d6de1c52SSatish Balay       v = bmat->a;
461d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
462d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
463d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
464d6de1c52SSatish Balay #else
465d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
466d6de1c52SSatish Balay #endif
467d6de1c52SSatish Balay       }
468d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
469d6de1c52SSatish Balay       *norm = sqrt(*norm);
470d6de1c52SSatish Balay     }
471acdf5bf4SSatish Balay     else
472e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
473d6de1c52SSatish Balay   }
474d6de1c52SSatish Balay   return 0;
475d6de1c52SSatish Balay }
47657b952d6SSatish Balay 
4775615d1e5SSatish Balay #undef __FUNC__
4785615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
479ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
48057b952d6SSatish Balay {
48157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
48257b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
48357b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
48457b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
48557b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
48657b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
48757b952d6SSatish Balay   InsertMode  addv;
48857b952d6SSatish Balay   Scalar      *rvalues,*svalues;
48957b952d6SSatish Balay 
49057b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
491e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
49257b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
493e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
49457b952d6SSatish Balay   }
495e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
49657b952d6SSatish Balay 
49757b952d6SSatish Balay   /*  first count number of contributors to each processor */
49857b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
49957b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
50057b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
50157b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
50257b952d6SSatish Balay     idx = baij->stash.idx[i];
50357b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
50457b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
50557b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
50657b952d6SSatish Balay       }
50757b952d6SSatish Balay     }
50857b952d6SSatish Balay   }
50957b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
51057b952d6SSatish Balay 
51157b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
51257b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
51357b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
51457b952d6SSatish Balay   nreceives = work[rank];
51557b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
51657b952d6SSatish Balay   nmax = work[rank];
51757b952d6SSatish Balay   PetscFree(work);
51857b952d6SSatish Balay 
51957b952d6SSatish Balay   /* post receives:
52057b952d6SSatish Balay        1) each message will consist of ordered pairs
52157b952d6SSatish Balay      (global index,value) we store the global index as a double
52257b952d6SSatish Balay      to simplify the message passing.
52357b952d6SSatish Balay        2) since we don't know how long each individual message is we
52457b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
52557b952d6SSatish Balay      this is a lot of wasted space.
52657b952d6SSatish Balay 
52757b952d6SSatish Balay 
52857b952d6SSatish Balay        This could be done better.
52957b952d6SSatish Balay   */
53057b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
53157b952d6SSatish Balay   CHKPTRQ(rvalues);
53257b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
53357b952d6SSatish Balay   CHKPTRQ(recv_waits);
53457b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
53557b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
53657b952d6SSatish Balay               comm,recv_waits+i);
53757b952d6SSatish Balay   }
53857b952d6SSatish Balay 
53957b952d6SSatish Balay   /* do sends:
54057b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
54157b952d6SSatish Balay          the ith processor
54257b952d6SSatish Balay   */
54357b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
54457b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
54557b952d6SSatish Balay   CHKPTRQ(send_waits);
54657b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
54757b952d6SSatish Balay   starts[0] = 0;
54857b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
54957b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
55057b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
55157b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
55257b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
55357b952d6SSatish Balay   }
55457b952d6SSatish Balay   PetscFree(owner);
55557b952d6SSatish Balay   starts[0] = 0;
55657b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
55757b952d6SSatish Balay   count = 0;
55857b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
55957b952d6SSatish Balay     if (procs[i]) {
56057b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
56157b952d6SSatish Balay                 comm,send_waits+count++);
56257b952d6SSatish Balay     }
56357b952d6SSatish Balay   }
56457b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
56557b952d6SSatish Balay 
56657b952d6SSatish Balay   /* Free cache space */
567d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
56857b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
56957b952d6SSatish Balay 
57057b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
57157b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
57257b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
57357b952d6SSatish Balay   baij->rmax       = nmax;
57457b952d6SSatish Balay 
57557b952d6SSatish Balay   return 0;
57657b952d6SSatish Balay }
57757b952d6SSatish Balay 
57857b952d6SSatish Balay 
5795615d1e5SSatish Balay #undef __FUNC__
5805615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
581ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
58257b952d6SSatish Balay {
58357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
58457b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
58557b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
58657b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
58757b952d6SSatish Balay   Scalar      *values,val;
588e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
58957b952d6SSatish Balay 
59057b952d6SSatish Balay   /*  wait on receives */
59157b952d6SSatish Balay   while (count) {
59257b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
59357b952d6SSatish Balay     /* unpack receives into our local space */
59457b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
59557b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
59657b952d6SSatish Balay     n = n/3;
59757b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
59857b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
59957b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
60057b952d6SSatish Balay       val = values[3*i+2];
60157b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
60257b952d6SSatish Balay         col -= baij->cstart*bs;
60357b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
60457b952d6SSatish Balay       }
60557b952d6SSatish Balay       else {
60657b952d6SSatish Balay         if (mat->was_assembled) {
607905e6a2fSBarry Smith           if (!baij->colmap) {
608905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
609905e6a2fSBarry Smith           }
610905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
61157b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
61257b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
61357b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
61457b952d6SSatish Balay           }
61557b952d6SSatish Balay         }
61657b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
61757b952d6SSatish Balay       }
61857b952d6SSatish Balay     }
61957b952d6SSatish Balay     count--;
62057b952d6SSatish Balay   }
62157b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
62257b952d6SSatish Balay 
62357b952d6SSatish Balay   /* wait on sends */
62457b952d6SSatish Balay   if (baij->nsends) {
62557b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
62657b952d6SSatish Balay     CHKPTRQ(send_status);
62757b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
62857b952d6SSatish Balay     PetscFree(send_status);
62957b952d6SSatish Balay   }
63057b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
63157b952d6SSatish Balay 
63257b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
63357b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
63457b952d6SSatish Balay 
63557b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
63657b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
63757b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
63857b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
63957b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
64057b952d6SSatish Balay   }
64157b952d6SSatish Balay 
6426d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
64357b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
64457b952d6SSatish Balay   }
64557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
64657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
64757b952d6SSatish Balay 
64857b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
64957b952d6SSatish Balay   return 0;
65057b952d6SSatish Balay }
65157b952d6SSatish Balay 
6525615d1e5SSatish Balay #undef __FUNC__
6535615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
65457b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
65557b952d6SSatish Balay {
65657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
65757b952d6SSatish Balay   int          ierr;
65857b952d6SSatish Balay 
65957b952d6SSatish Balay   if (baij->size == 1) {
66057b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
66157b952d6SSatish Balay   }
662e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
66357b952d6SSatish Balay   return 0;
66457b952d6SSatish Balay }
66557b952d6SSatish Balay 
6665615d1e5SSatish Balay #undef __FUNC__
6675615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
66857b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
66957b952d6SSatish Balay {
67057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
671cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
67257b952d6SSatish Balay   FILE         *fd;
67357b952d6SSatish Balay   ViewerType   vtype;
67457b952d6SSatish Balay 
67557b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
67657b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
67757b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
678639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
6794e220ebcSLois Curfman McInnes       MatInfo info;
68057b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
68157b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6824e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
68357b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
68457b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
6854e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
6864e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
6874e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
6884e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
6894e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
6904e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
69157b952d6SSatish Balay       fflush(fd);
69257b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
69357b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
69457b952d6SSatish Balay       return 0;
69557b952d6SSatish Balay     }
696639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
697bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
69857b952d6SSatish Balay       return 0;
69957b952d6SSatish Balay     }
70057b952d6SSatish Balay   }
70157b952d6SSatish Balay 
70257b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
70357b952d6SSatish Balay     Draw       draw;
70457b952d6SSatish Balay     PetscTruth isnull;
70557b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
70657b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
70757b952d6SSatish Balay   }
70857b952d6SSatish Balay 
70957b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
71057b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
71157b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
71257b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
71357b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
71457b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
71557b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
71657b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
71757b952d6SSatish Balay     fflush(fd);
71857b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
71957b952d6SSatish Balay   }
72057b952d6SSatish Balay   else {
72157b952d6SSatish Balay     int size = baij->size;
72257b952d6SSatish Balay     rank = baij->rank;
72357b952d6SSatish Balay     if (size == 1) {
72457b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
72557b952d6SSatish Balay     }
72657b952d6SSatish Balay     else {
72757b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
72857b952d6SSatish Balay       Mat         A;
72957b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
73057b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
73157b952d6SSatish Balay       int         mbs=baij->mbs;
73257b952d6SSatish Balay       Scalar      *a;
73357b952d6SSatish Balay 
73457b952d6SSatish Balay       if (!rank) {
735cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
73657b952d6SSatish Balay         CHKERRQ(ierr);
73757b952d6SSatish Balay       }
73857b952d6SSatish Balay       else {
739cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
74057b952d6SSatish Balay         CHKERRQ(ierr);
74157b952d6SSatish Balay       }
74257b952d6SSatish Balay       PLogObjectParent(mat,A);
74357b952d6SSatish Balay 
74457b952d6SSatish Balay       /* copy over the A part */
74557b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
74657b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
74757b952d6SSatish Balay       row = baij->rstart;
74857b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
74957b952d6SSatish Balay 
75057b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
75157b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
75257b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
75357b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
75457b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
75557b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
756cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
757cee3aa6bSSatish Balay             col++; a += bs;
75857b952d6SSatish Balay           }
75957b952d6SSatish Balay         }
76057b952d6SSatish Balay       }
76157b952d6SSatish Balay       /* copy over the B part */
76257b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
76357b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
76457b952d6SSatish Balay       row = baij->rstart*bs;
76557b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
76657b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
76757b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
76857b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
76957b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
77057b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
771cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
772cee3aa6bSSatish Balay             col++; a += bs;
77357b952d6SSatish Balay           }
77457b952d6SSatish Balay         }
77557b952d6SSatish Balay       }
77657b952d6SSatish Balay       PetscFree(rvals);
7776d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7786d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
77957b952d6SSatish Balay       if (!rank) {
78057b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
78157b952d6SSatish Balay       }
78257b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
78357b952d6SSatish Balay     }
78457b952d6SSatish Balay   }
78557b952d6SSatish Balay   return 0;
78657b952d6SSatish Balay }
78757b952d6SSatish Balay 
78857b952d6SSatish Balay 
78957b952d6SSatish Balay 
7905615d1e5SSatish Balay #undef __FUNC__
7915615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
792ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
79357b952d6SSatish Balay {
79457b952d6SSatish Balay   Mat         mat = (Mat) obj;
79557b952d6SSatish Balay   int         ierr;
79657b952d6SSatish Balay   ViewerType  vtype;
79757b952d6SSatish Balay 
79857b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
79957b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
80057b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
80157b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
80257b952d6SSatish Balay   }
80357b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
80457b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
80557b952d6SSatish Balay   }
80657b952d6SSatish Balay   return 0;
80757b952d6SSatish Balay }
80857b952d6SSatish Balay 
8095615d1e5SSatish Balay #undef __FUNC__
8105615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
811ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
81279bdfe76SSatish Balay {
81379bdfe76SSatish Balay   Mat         mat = (Mat) obj;
81479bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
81579bdfe76SSatish Balay   int         ierr;
81679bdfe76SSatish Balay 
81779bdfe76SSatish Balay #if defined(PETSC_LOG)
81879bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
81979bdfe76SSatish Balay #endif
82079bdfe76SSatish Balay 
82179bdfe76SSatish Balay   PetscFree(baij->rowners);
82279bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
82379bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
82479bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
82579bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
82679bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
82779bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
82879bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
82930793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
83079bdfe76SSatish Balay   PetscFree(baij);
83190f02eecSBarry Smith   if (mat->mapping) {
83290f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
83390f02eecSBarry Smith   }
83479bdfe76SSatish Balay   PLogObjectDestroy(mat);
83579bdfe76SSatish Balay   PetscHeaderDestroy(mat);
83679bdfe76SSatish Balay   return 0;
83779bdfe76SSatish Balay }
83879bdfe76SSatish Balay 
8395615d1e5SSatish Balay #undef __FUNC__
8405615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
841ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
842cee3aa6bSSatish Balay {
843cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
84447b4a8eaSLois Curfman McInnes   int         ierr, nt;
845cee3aa6bSSatish Balay 
846c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
84747b4a8eaSLois Curfman McInnes   if (nt != a->n) {
848ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
84947b4a8eaSLois Curfman McInnes   }
850c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
85147b4a8eaSLois Curfman McInnes   if (nt != a->m) {
852e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
85347b4a8eaSLois Curfman McInnes   }
85443a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
855cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
85643a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
857cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
85843a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
859cee3aa6bSSatish Balay   return 0;
860cee3aa6bSSatish Balay }
861cee3aa6bSSatish Balay 
8625615d1e5SSatish Balay #undef __FUNC__
8635615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
864ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
865cee3aa6bSSatish Balay {
866cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
867cee3aa6bSSatish Balay   int        ierr;
86843a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
869cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
87043a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
871cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
872cee3aa6bSSatish Balay   return 0;
873cee3aa6bSSatish Balay }
874cee3aa6bSSatish Balay 
8755615d1e5SSatish Balay #undef __FUNC__
8765615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
877ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
878cee3aa6bSSatish Balay {
879cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
880cee3aa6bSSatish Balay   int        ierr;
881cee3aa6bSSatish Balay 
882cee3aa6bSSatish Balay   /* do nondiagonal part */
883cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
884cee3aa6bSSatish Balay   /* send it on its way */
885537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
886cee3aa6bSSatish Balay   /* do local part */
887cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
888cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
889cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
890cee3aa6bSSatish Balay   /* but is not perhaps always true. */
891639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
892cee3aa6bSSatish Balay   return 0;
893cee3aa6bSSatish Balay }
894cee3aa6bSSatish Balay 
8955615d1e5SSatish Balay #undef __FUNC__
8965615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
897ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
898cee3aa6bSSatish Balay {
899cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
900cee3aa6bSSatish Balay   int        ierr;
901cee3aa6bSSatish Balay 
902cee3aa6bSSatish Balay   /* do nondiagonal part */
903cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
904cee3aa6bSSatish Balay   /* send it on its way */
905537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
906cee3aa6bSSatish Balay   /* do local part */
907cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
908cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
909cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
910cee3aa6bSSatish Balay   /* but is not perhaps always true. */
911537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
912cee3aa6bSSatish Balay   return 0;
913cee3aa6bSSatish Balay }
914cee3aa6bSSatish Balay 
915cee3aa6bSSatish Balay /*
916cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
917cee3aa6bSSatish Balay    diagonal block
918cee3aa6bSSatish Balay */
9195615d1e5SSatish Balay #undef __FUNC__
9205615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
921ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
922cee3aa6bSSatish Balay {
923cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
924cee3aa6bSSatish Balay   if (a->M != a->N)
925e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
926cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
927cee3aa6bSSatish Balay }
928cee3aa6bSSatish Balay 
9295615d1e5SSatish Balay #undef __FUNC__
9305615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
931ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
932cee3aa6bSSatish Balay {
933cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
934cee3aa6bSSatish Balay   int        ierr;
935cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
936cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
937cee3aa6bSSatish Balay   return 0;
938cee3aa6bSSatish Balay }
939026e39d0SSatish Balay 
9405615d1e5SSatish Balay #undef __FUNC__
9415615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
942ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
94357b952d6SSatish Balay {
94457b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
94557b952d6SSatish Balay   *m = mat->M; *n = mat->N;
94657b952d6SSatish Balay   return 0;
94757b952d6SSatish Balay }
94857b952d6SSatish Balay 
9495615d1e5SSatish Balay #undef __FUNC__
9505615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
951ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
95257b952d6SSatish Balay {
95357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
95457b952d6SSatish Balay   *m = mat->m; *n = mat->N;
95557b952d6SSatish Balay   return 0;
95657b952d6SSatish Balay }
95757b952d6SSatish Balay 
9585615d1e5SSatish Balay #undef __FUNC__
9595615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
960ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
96157b952d6SSatish Balay {
96257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
96357b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
96457b952d6SSatish Balay   return 0;
96557b952d6SSatish Balay }
96657b952d6SSatish Balay 
967acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
968acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
969acdf5bf4SSatish Balay 
9705615d1e5SSatish Balay #undef __FUNC__
9715615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
972acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
973acdf5bf4SSatish Balay {
974acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
975acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
976acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
977d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
978d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
979acdf5bf4SSatish Balay 
980e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
981acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
982acdf5bf4SSatish Balay 
983acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
984acdf5bf4SSatish Balay     /*
985acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
986acdf5bf4SSatish Balay     */
987acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
988bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
989bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
990acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
991acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
992acdf5bf4SSatish Balay     }
993acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
994acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
995acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
996acdf5bf4SSatish Balay   }
997acdf5bf4SSatish Balay 
998acdf5bf4SSatish Balay 
999e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
1000d9d09a02SSatish Balay   lrow = row - brstart;
1001acdf5bf4SSatish Balay 
1002acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1003acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1004acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1005acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1006acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1007acdf5bf4SSatish Balay   nztot = nzA + nzB;
1008acdf5bf4SSatish Balay 
1009acdf5bf4SSatish Balay   cmap  = mat->garray;
1010acdf5bf4SSatish Balay   if (v  || idx) {
1011acdf5bf4SSatish Balay     if (nztot) {
1012acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1013acdf5bf4SSatish Balay       int imark = -1;
1014acdf5bf4SSatish Balay       if (v) {
1015acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1016acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1017d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1018acdf5bf4SSatish Balay           else break;
1019acdf5bf4SSatish Balay         }
1020acdf5bf4SSatish Balay         imark = i;
1021acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1022acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1023acdf5bf4SSatish Balay       }
1024acdf5bf4SSatish Balay       if (idx) {
1025acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1026acdf5bf4SSatish Balay         if (imark > -1) {
1027acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1028bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1029acdf5bf4SSatish Balay           }
1030acdf5bf4SSatish Balay         } else {
1031acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1032d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1033d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1034acdf5bf4SSatish Balay             else break;
1035acdf5bf4SSatish Balay           }
1036acdf5bf4SSatish Balay           imark = i;
1037acdf5bf4SSatish Balay         }
1038d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1039d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1040acdf5bf4SSatish Balay       }
1041acdf5bf4SSatish Balay     }
1042d212a18eSSatish Balay     else {
1043d212a18eSSatish Balay       if (idx) *idx = 0;
1044d212a18eSSatish Balay       if (v)   *v   = 0;
1045d212a18eSSatish Balay     }
1046acdf5bf4SSatish Balay   }
1047acdf5bf4SSatish Balay   *nz = nztot;
1048acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1049acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1050acdf5bf4SSatish Balay   return 0;
1051acdf5bf4SSatish Balay }
1052acdf5bf4SSatish Balay 
10535615d1e5SSatish Balay #undef __FUNC__
10545615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1055acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1056acdf5bf4SSatish Balay {
1057acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1058acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1059e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
1060acdf5bf4SSatish Balay   }
1061acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
1062acdf5bf4SSatish Balay   return 0;
1063acdf5bf4SSatish Balay }
1064acdf5bf4SSatish Balay 
10655615d1e5SSatish Balay #undef __FUNC__
10665615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1067ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
10685a838052SSatish Balay {
10695a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
10705a838052SSatish Balay   *bs = baij->bs;
10715a838052SSatish Balay   return 0;
10725a838052SSatish Balay }
10735a838052SSatish Balay 
10745615d1e5SSatish Balay #undef __FUNC__
10755615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1076ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
107758667388SSatish Balay {
107858667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
107958667388SSatish Balay   int         ierr;
108058667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
108158667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
108258667388SSatish Balay   return 0;
108358667388SSatish Balay }
10840ac07820SSatish Balay 
10855615d1e5SSatish Balay #undef __FUNC__
10865615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1087ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
10880ac07820SSatish Balay {
10894e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
10904e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
10917d57db60SLois Curfman McInnes   int         ierr;
10927d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
10930ac07820SSatish Balay 
10944e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
10954e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
10964e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
10974e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
10984e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
10994e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
11004e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
11014e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
11024e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
11030ac07820SSatish Balay   if (flag == MAT_LOCAL) {
11044e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
11054e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
11064e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
11074e220ebcSLois Curfman McInnes     info->memory       = isend[3];
11084e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
11090ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1110dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
11114e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11124e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11134e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11144e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11154e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11160ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1117dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
11184e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11194e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11204e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11214e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11224e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11230ac07820SSatish Balay   }
11244e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
11254e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
11264e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
11270ac07820SSatish Balay   return 0;
11280ac07820SSatish Balay }
11290ac07820SSatish Balay 
11305615d1e5SSatish Balay #undef __FUNC__
11315615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1132ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
113358667388SSatish Balay {
113458667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
113558667388SSatish Balay 
113658667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
113758667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
11386da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1139c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
1140c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1141b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1142b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1143b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1144aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
114558667388SSatish Balay         MatSetOption(a->A,op);
114658667388SSatish Balay         MatSetOption(a->B,op);
1147b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
11486da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
114958667388SSatish Balay              op == MAT_SYMMETRIC ||
115058667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
115158667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
115258667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
115358667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
115458667388SSatish Balay     a->roworiented = 0;
115558667388SSatish Balay     MatSetOption(a->A,op);
115658667388SSatish Balay     MatSetOption(a->B,op);
1157*2b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
115890f02eecSBarry Smith     a->donotstash = 1;
115990f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1160e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
116158667388SSatish Balay   else
1162e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
116358667388SSatish Balay   return 0;
116458667388SSatish Balay }
116558667388SSatish Balay 
11665615d1e5SSatish Balay #undef __FUNC__
11675615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1168ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
11690ac07820SSatish Balay {
11700ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
11710ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
11720ac07820SSatish Balay   Mat        B;
11730ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
11740ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
11750ac07820SSatish Balay   Scalar     *a;
11760ac07820SSatish Balay 
11770ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1178e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
11790ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
11800ac07820SSatish Balay   CHKERRQ(ierr);
11810ac07820SSatish Balay 
11820ac07820SSatish Balay   /* copy over the A part */
11830ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
11840ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11850ac07820SSatish Balay   row = baij->rstart;
11860ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
11870ac07820SSatish Balay 
11880ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
11890ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
11900ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
11910ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
11920ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
11930ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
11940ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
11950ac07820SSatish Balay         col++; a += bs;
11960ac07820SSatish Balay       }
11970ac07820SSatish Balay     }
11980ac07820SSatish Balay   }
11990ac07820SSatish Balay   /* copy over the B part */
12000ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
12010ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12020ac07820SSatish Balay   row = baij->rstart*bs;
12030ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12040ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12050ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12060ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12070ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
12080ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12090ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12100ac07820SSatish Balay         col++; a += bs;
12110ac07820SSatish Balay       }
12120ac07820SSatish Balay     }
12130ac07820SSatish Balay   }
12140ac07820SSatish Balay   PetscFree(rvals);
12150ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12160ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12170ac07820SSatish Balay 
12180ac07820SSatish Balay   if (matout != PETSC_NULL) {
12190ac07820SSatish Balay     *matout = B;
12200ac07820SSatish Balay   } else {
12210ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
12220ac07820SSatish Balay     PetscFree(baij->rowners);
12230ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
12240ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
12250ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
12260ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
12270ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
12280ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
12290ac07820SSatish Balay     PetscFree(baij);
12300ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
12310ac07820SSatish Balay     PetscHeaderDestroy(B);
12320ac07820SSatish Balay   }
12330ac07820SSatish Balay   return 0;
12340ac07820SSatish Balay }
12350e95ebc0SSatish Balay 
12365615d1e5SSatish Balay #undef __FUNC__
12375615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
12380e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
12390e95ebc0SSatish Balay {
12400e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
12410e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
12420e95ebc0SSatish Balay   int ierr,s1,s2,s3;
12430e95ebc0SSatish Balay 
12440e95ebc0SSatish Balay   if (ll)  {
12450e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
12460e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1247e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
12480e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
12490e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
12500e95ebc0SSatish Balay   }
1251e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
12520e95ebc0SSatish Balay   return 0;
12530e95ebc0SSatish Balay }
12540e95ebc0SSatish Balay 
12550ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
12560ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
12570ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
12580ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
12590ac07820SSatish Balay    routine.
12600ac07820SSatish Balay */
12615615d1e5SSatish Balay #undef __FUNC__
12625615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1263ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
12640ac07820SSatish Balay {
12650ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
12660ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
12670ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
12680ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
12690ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
12700ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
12710ac07820SSatish Balay   MPI_Comm       comm = A->comm;
12720ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
12730ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
12740ac07820SSatish Balay   IS             istmp;
12750ac07820SSatish Balay 
12760ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
12770ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
12780ac07820SSatish Balay 
12790ac07820SSatish Balay   /*  first count number of contributors to each processor */
12800ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
12810ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
12820ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
12830ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
12840ac07820SSatish Balay     idx = rows[i];
12850ac07820SSatish Balay     found = 0;
12860ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
12870ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
12880ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
12890ac07820SSatish Balay       }
12900ac07820SSatish Balay     }
1291e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
12920ac07820SSatish Balay   }
12930ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
12940ac07820SSatish Balay 
12950ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
12960ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
12970ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
12980ac07820SSatish Balay   nrecvs = work[rank];
12990ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
13000ac07820SSatish Balay   nmax = work[rank];
13010ac07820SSatish Balay   PetscFree(work);
13020ac07820SSatish Balay 
13030ac07820SSatish Balay   /* post receives:   */
13040ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
13050ac07820SSatish Balay   CHKPTRQ(rvalues);
13060ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
13070ac07820SSatish Balay   CHKPTRQ(recv_waits);
13080ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13090ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
13100ac07820SSatish Balay   }
13110ac07820SSatish Balay 
13120ac07820SSatish Balay   /* do sends:
13130ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
13140ac07820SSatish Balay          the ith processor
13150ac07820SSatish Balay   */
13160ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
13170ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
13180ac07820SSatish Balay   CHKPTRQ(send_waits);
13190ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
13200ac07820SSatish Balay   starts[0] = 0;
13210ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13220ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13230ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
13240ac07820SSatish Balay   }
13250ac07820SSatish Balay   ISRestoreIndices(is,&rows);
13260ac07820SSatish Balay 
13270ac07820SSatish Balay   starts[0] = 0;
13280ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13290ac07820SSatish Balay   count = 0;
13300ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
13310ac07820SSatish Balay     if (procs[i]) {
13320ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
13330ac07820SSatish Balay     }
13340ac07820SSatish Balay   }
13350ac07820SSatish Balay   PetscFree(starts);
13360ac07820SSatish Balay 
13370ac07820SSatish Balay   base = owners[rank]*bs;
13380ac07820SSatish Balay 
13390ac07820SSatish Balay   /*  wait on receives */
13400ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
13410ac07820SSatish Balay   source = lens + nrecvs;
13420ac07820SSatish Balay   count  = nrecvs; slen = 0;
13430ac07820SSatish Balay   while (count) {
13440ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
13450ac07820SSatish Balay     /* unpack receives into our local space */
13460ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
13470ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
13480ac07820SSatish Balay     lens[imdex]  = n;
13490ac07820SSatish Balay     slen += n;
13500ac07820SSatish Balay     count--;
13510ac07820SSatish Balay   }
13520ac07820SSatish Balay   PetscFree(recv_waits);
13530ac07820SSatish Balay 
13540ac07820SSatish Balay   /* move the data into the send scatter */
13550ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
13560ac07820SSatish Balay   count = 0;
13570ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13580ac07820SSatish Balay     values = rvalues + i*nmax;
13590ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
13600ac07820SSatish Balay       lrows[count++] = values[j] - base;
13610ac07820SSatish Balay     }
13620ac07820SSatish Balay   }
13630ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
13640ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
13650ac07820SSatish Balay 
13660ac07820SSatish Balay   /* actually zap the local rows */
1367537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
13680ac07820SSatish Balay   PLogObjectParent(A,istmp);
13690ac07820SSatish Balay   PetscFree(lrows);
13700ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
13710ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
13720ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
13730ac07820SSatish Balay 
13740ac07820SSatish Balay   /* wait on sends */
13750ac07820SSatish Balay   if (nsends) {
13760ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
13770ac07820SSatish Balay     CHKPTRQ(send_status);
13780ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
13790ac07820SSatish Balay     PetscFree(send_status);
13800ac07820SSatish Balay   }
13810ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
13820ac07820SSatish Balay 
13830ac07820SSatish Balay   return 0;
13840ac07820SSatish Balay }
1385ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
13865615d1e5SSatish Balay #undef __FUNC__
13875615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1388ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1389ba4ca20aSSatish Balay {
1390ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1391ba4ca20aSSatish Balay 
1392ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1393ba4ca20aSSatish Balay   else return 0;
1394ba4ca20aSSatish Balay }
13950ac07820SSatish Balay 
13965615d1e5SSatish Balay #undef __FUNC__
13975615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1398ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1399bb5a7306SBarry Smith {
1400bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1401bb5a7306SBarry Smith   int         ierr;
1402bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1403bb5a7306SBarry Smith   return 0;
1404bb5a7306SBarry Smith }
1405bb5a7306SBarry Smith 
14060ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
14070ac07820SSatish Balay 
140879bdfe76SSatish Balay /* -------------------------------------------------------------------*/
140979bdfe76SSatish Balay static struct _MatOps MatOps = {
1410bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
14114c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
14124c50302cSBarry Smith   0,0,0,0,
14130ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
14140e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
141558667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
14164c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
14174c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
14184c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
141994a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1420d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1421ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1422bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1423ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
142479bdfe76SSatish Balay 
142579bdfe76SSatish Balay 
14265615d1e5SSatish Balay #undef __FUNC__
14275615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
142879bdfe76SSatish Balay /*@C
142979bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
143079bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
143179bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
143279bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
143379bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
143479bdfe76SSatish Balay 
143579bdfe76SSatish Balay    Input Parameters:
143679bdfe76SSatish Balay .  comm - MPI communicator
143779bdfe76SSatish Balay .  bs   - size of blockk
143879bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
143992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
144092e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
144192e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
144292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
144392e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
144479bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
144592e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
144679bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
144779bdfe76SSatish Balay            submatrix  (same for all local rows)
144892e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
144992e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
145092e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
145192e8d321SLois Curfman McInnes            it is zero.
145292e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
145379bdfe76SSatish Balay            submatrix (same for all local rows).
145492e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
145592e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
145692e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
145779bdfe76SSatish Balay 
145879bdfe76SSatish Balay    Output Parameter:
145979bdfe76SSatish Balay .  A - the matrix
146079bdfe76SSatish Balay 
146179bdfe76SSatish Balay    Notes:
146279bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
146379bdfe76SSatish Balay    (possibly both).
146479bdfe76SSatish Balay 
146579bdfe76SSatish Balay    Storage Information:
146679bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
146779bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
146879bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
146979bdfe76SSatish Balay    local matrix (a rectangular submatrix).
147079bdfe76SSatish Balay 
147179bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
147279bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
147379bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
147479bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
147579bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
147679bdfe76SSatish Balay 
147779bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
147879bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
147979bdfe76SSatish Balay 
148079bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
148179bdfe76SSatish Balay $         -------------------
148279bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
148379bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
148479bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
148579bdfe76SSatish Balay $         -------------------
148679bdfe76SSatish Balay $
148779bdfe76SSatish Balay 
148879bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
148979bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
149079bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
149157b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
149279bdfe76SSatish Balay 
149379bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
149479bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
149579bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
149692e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
149792e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
14986da5968aSLois Curfman McInnes    matrices.
149979bdfe76SSatish Balay 
150092e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
150179bdfe76SSatish Balay 
150279bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
150379bdfe76SSatish Balay @*/
150479bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
150579bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
150679bdfe76SSatish Balay {
150779bdfe76SSatish Balay   Mat          B;
150879bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
15094c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
151079bdfe76SSatish Balay 
1511e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
151279bdfe76SSatish Balay   *A = 0;
151379bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
151479bdfe76SSatish Balay   PLogObjectCreate(B);
151579bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
151679bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
151779bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
15184c50302cSBarry Smith   MPI_Comm_size(comm,&size);
15194c50302cSBarry Smith   if (size == 1) {
15204c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
15214c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
15224c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
15234c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
15244c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
15254c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
15264c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
15274c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
15284c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
15294c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
15304c50302cSBarry Smith   }
15314c50302cSBarry Smith 
153279bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
153379bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
153490f02eecSBarry Smith   B->mapping    = 0;
153579bdfe76SSatish Balay   B->factor     = 0;
153679bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
153779bdfe76SSatish Balay 
1538e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
153979bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
154079bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
154179bdfe76SSatish Balay 
154279bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1543e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1544e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1545e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1546cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1547cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
154879bdfe76SSatish Balay 
154979bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
155079bdfe76SSatish Balay     work[0] = m; work[1] = n;
155179bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
155279bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
155379bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
155479bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
155579bdfe76SSatish Balay   }
155679bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
155779bdfe76SSatish Balay     Mbs = M/bs;
1558e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
155979bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
156079bdfe76SSatish Balay     m   = mbs*bs;
156179bdfe76SSatish Balay   }
156279bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
156379bdfe76SSatish Balay     Nbs = N/bs;
1564e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
156579bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
156679bdfe76SSatish Balay     n   = nbs*bs;
156779bdfe76SSatish Balay   }
1568e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
156979bdfe76SSatish Balay 
157079bdfe76SSatish Balay   b->m = m; B->m = m;
157179bdfe76SSatish Balay   b->n = n; B->n = n;
157279bdfe76SSatish Balay   b->N = N; B->N = N;
157379bdfe76SSatish Balay   b->M = M; B->M = M;
157479bdfe76SSatish Balay   b->bs  = bs;
157579bdfe76SSatish Balay   b->bs2 = bs*bs;
157679bdfe76SSatish Balay   b->mbs = mbs;
157779bdfe76SSatish Balay   b->nbs = nbs;
157879bdfe76SSatish Balay   b->Mbs = Mbs;
157979bdfe76SSatish Balay   b->Nbs = Nbs;
158079bdfe76SSatish Balay 
158179bdfe76SSatish Balay   /* build local table of row and column ownerships */
158279bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
158379bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
15840ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
158579bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
158679bdfe76SSatish Balay   b->rowners[0] = 0;
158779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
158879bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
158979bdfe76SSatish Balay   }
159079bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
159179bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
15924fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
15934fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
15944fa0d573SSatish Balay 
159579bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
159679bdfe76SSatish Balay   b->cowners[0] = 0;
159779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
159879bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
159979bdfe76SSatish Balay   }
160079bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
160179bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
16024fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
16034fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
160479bdfe76SSatish Balay 
160579bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
160679bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
160779bdfe76SSatish Balay   PLogObjectParent(B,b->A);
160879bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
160979bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
161079bdfe76SSatish Balay   PLogObjectParent(B,b->B);
161179bdfe76SSatish Balay 
161279bdfe76SSatish Balay   /* build cache for off array entries formed */
161379bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
161490f02eecSBarry Smith   b->donotstash  = 0;
161579bdfe76SSatish Balay   b->colmap      = 0;
161679bdfe76SSatish Balay   b->garray      = 0;
161779bdfe76SSatish Balay   b->roworiented = 1;
161879bdfe76SSatish Balay 
161930793edcSSatish Balay   /* stuff used in block assembly */
162030793edcSSatish Balay   b->barray      = 0;
162130793edcSSatish Balay 
162279bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
162379bdfe76SSatish Balay   b->lvec        = 0;
162479bdfe76SSatish Balay   b->Mvctx       = 0;
162579bdfe76SSatish Balay 
162679bdfe76SSatish Balay   /* stuff for MatGetRow() */
162779bdfe76SSatish Balay   b->rowindices   = 0;
162879bdfe76SSatish Balay   b->rowvalues    = 0;
162979bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
163079bdfe76SSatish Balay 
163179bdfe76SSatish Balay   *A = B;
163279bdfe76SSatish Balay   return 0;
163379bdfe76SSatish Balay }
1634026e39d0SSatish Balay 
16355615d1e5SSatish Balay #undef __FUNC__
16365615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
16370ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
16380ac07820SSatish Balay {
16390ac07820SSatish Balay   Mat         mat;
16400ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
16410ac07820SSatish Balay   int         ierr, len=0, flg;
16420ac07820SSatish Balay 
16430ac07820SSatish Balay   *newmat       = 0;
16440ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
16450ac07820SSatish Balay   PLogObjectCreate(mat);
16460ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
16470ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
16480ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
16490ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
16500ac07820SSatish Balay   mat->factor     = matin->factor;
16510ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
16520ac07820SSatish Balay 
16530ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
16540ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
16550ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
16560ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
16570ac07820SSatish Balay 
16580ac07820SSatish Balay   a->bs  = oldmat->bs;
16590ac07820SSatish Balay   a->bs2 = oldmat->bs2;
16600ac07820SSatish Balay   a->mbs = oldmat->mbs;
16610ac07820SSatish Balay   a->nbs = oldmat->nbs;
16620ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
16630ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
16640ac07820SSatish Balay 
16650ac07820SSatish Balay   a->rstart       = oldmat->rstart;
16660ac07820SSatish Balay   a->rend         = oldmat->rend;
16670ac07820SSatish Balay   a->cstart       = oldmat->cstart;
16680ac07820SSatish Balay   a->cend         = oldmat->cend;
16690ac07820SSatish Balay   a->size         = oldmat->size;
16700ac07820SSatish Balay   a->rank         = oldmat->rank;
1671e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
16720ac07820SSatish Balay   a->rowvalues    = 0;
16730ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
167430793edcSSatish Balay   a->barray       = 0;
16750ac07820SSatish Balay 
16760ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
16770ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
16780ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
16790ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
16800ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
16810ac07820SSatish Balay   if (oldmat->colmap) {
16820ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
16830ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
16840ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
16850ac07820SSatish Balay   } else a->colmap = 0;
16860ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
16870ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
16880ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
16890ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
16900ac07820SSatish Balay   } else a->garray = 0;
16910ac07820SSatish Balay 
16920ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
16930ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
16940ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
16950ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
16960ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
16970ac07820SSatish Balay   PLogObjectParent(mat,a->A);
16980ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
16990ac07820SSatish Balay   PLogObjectParent(mat,a->B);
17000ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
17010ac07820SSatish Balay   if (flg) {
17020ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
17030ac07820SSatish Balay   }
17040ac07820SSatish Balay   *newmat = mat;
17050ac07820SSatish Balay   return 0;
17060ac07820SSatish Balay }
170757b952d6SSatish Balay 
170857b952d6SSatish Balay #include "sys.h"
170957b952d6SSatish Balay 
17105615d1e5SSatish Balay #undef __FUNC__
17115615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
171257b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
171357b952d6SSatish Balay {
171457b952d6SSatish Balay   Mat          A;
171557b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
171657b952d6SSatish Balay   Scalar       *vals,*buf;
171757b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
171857b952d6SSatish Balay   MPI_Status   status;
1719cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
172057b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
172157b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
172257b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
172357b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
172457b952d6SSatish Balay 
172557b952d6SSatish Balay 
172657b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
172757b952d6SSatish Balay   bs2  = bs*bs;
172857b952d6SSatish Balay 
172957b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
173057b952d6SSatish Balay   if (!rank) {
173157b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
173257b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1733e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
173457b952d6SSatish Balay   }
173557b952d6SSatish Balay 
173657b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
173757b952d6SSatish Balay   M = header[1]; N = header[2];
173857b952d6SSatish Balay 
1739e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
174057b952d6SSatish Balay 
174157b952d6SSatish Balay   /*
174257b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
174357b952d6SSatish Balay      divisible by the blocksize
174457b952d6SSatish Balay   */
174557b952d6SSatish Balay   Mbs        = M/bs;
174657b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
174757b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
174857b952d6SSatish Balay   else                  Mbs++;
174957b952d6SSatish Balay   if (extra_rows &&!rank) {
1750b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
175157b952d6SSatish Balay   }
1752537820f0SBarry Smith 
175357b952d6SSatish Balay   /* determine ownership of all rows */
175457b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
175557b952d6SSatish Balay   m   = mbs * bs;
1756cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1757cee3aa6bSSatish Balay   browners = rowners + size + 1;
175857b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
175957b952d6SSatish Balay   rowners[0] = 0;
1760cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1761cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
176257b952d6SSatish Balay   rstart = rowners[rank];
176357b952d6SSatish Balay   rend   = rowners[rank+1];
176457b952d6SSatish Balay 
176557b952d6SSatish Balay   /* distribute row lengths to all processors */
176657b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
176757b952d6SSatish Balay   if (!rank) {
176857b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
176957b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
177057b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
177157b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1772cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1773cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
177457b952d6SSatish Balay     PetscFree(sndcounts);
177557b952d6SSatish Balay   }
177657b952d6SSatish Balay   else {
177757b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
177857b952d6SSatish Balay   }
177957b952d6SSatish Balay 
178057b952d6SSatish Balay   if (!rank) {
178157b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
178257b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
178357b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
178457b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
178557b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
178657b952d6SSatish Balay         procsnz[i] += rowlengths[j];
178757b952d6SSatish Balay       }
178857b952d6SSatish Balay     }
178957b952d6SSatish Balay     PetscFree(rowlengths);
179057b952d6SSatish Balay 
179157b952d6SSatish Balay     /* determine max buffer needed and allocate it */
179257b952d6SSatish Balay     maxnz = 0;
179357b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
179457b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
179557b952d6SSatish Balay     }
179657b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
179757b952d6SSatish Balay 
179857b952d6SSatish Balay     /* read in my part of the matrix column indices  */
179957b952d6SSatish Balay     nz = procsnz[0];
180057b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
180157b952d6SSatish Balay     mycols = ibuf;
1802cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
180357b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1804cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1805cee3aa6bSSatish Balay 
180657b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
180757b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
180857b952d6SSatish Balay       nz = procsnz[i];
180957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
181057b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
181157b952d6SSatish Balay     }
181257b952d6SSatish Balay     /* read in the stuff for the last proc */
181357b952d6SSatish Balay     if ( size != 1 ) {
181457b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
181557b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
181657b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1817cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
181857b952d6SSatish Balay     }
181957b952d6SSatish Balay     PetscFree(cols);
182057b952d6SSatish Balay   }
182157b952d6SSatish Balay   else {
182257b952d6SSatish Balay     /* determine buffer space needed for message */
182357b952d6SSatish Balay     nz = 0;
182457b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
182557b952d6SSatish Balay       nz += locrowlens[i];
182657b952d6SSatish Balay     }
182757b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
182857b952d6SSatish Balay     mycols = ibuf;
182957b952d6SSatish Balay     /* receive message of column indices*/
183057b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
183157b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1832e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
183357b952d6SSatish Balay   }
183457b952d6SSatish Balay 
183557b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1836cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1837cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
183857b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1839cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
184057b952d6SSatish Balay   masked1 = mask    + Mbs;
184157b952d6SSatish Balay   masked2 = masked1 + Mbs;
184257b952d6SSatish Balay   rowcount = 0; nzcount = 0;
184357b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
184457b952d6SSatish Balay     dcount  = 0;
184557b952d6SSatish Balay     odcount = 0;
184657b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
184757b952d6SSatish Balay       kmax = locrowlens[rowcount];
184857b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
184957b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
185057b952d6SSatish Balay         if (!mask[tmp]) {
185157b952d6SSatish Balay           mask[tmp] = 1;
185257b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
185357b952d6SSatish Balay           else masked1[dcount++] = tmp;
185457b952d6SSatish Balay         }
185557b952d6SSatish Balay       }
185657b952d6SSatish Balay       rowcount++;
185757b952d6SSatish Balay     }
1858cee3aa6bSSatish Balay 
185957b952d6SSatish Balay     dlens[i]  = dcount;
186057b952d6SSatish Balay     odlens[i] = odcount;
1861cee3aa6bSSatish Balay 
186257b952d6SSatish Balay     /* zero out the mask elements we set */
186357b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
186457b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
186557b952d6SSatish Balay   }
1866cee3aa6bSSatish Balay 
186757b952d6SSatish Balay   /* create our matrix */
1868537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1869537820f0SBarry Smith          CHKERRQ(ierr);
187057b952d6SSatish Balay   A = *newmat;
18716d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
187257b952d6SSatish Balay 
187357b952d6SSatish Balay   if (!rank) {
187457b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
187557b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
187657b952d6SSatish Balay     nz = procsnz[0];
187757b952d6SSatish Balay     vals = buf;
1878cee3aa6bSSatish Balay     mycols = ibuf;
1879cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
188057b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1881cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1882537820f0SBarry Smith 
188357b952d6SSatish Balay     /* insert into matrix */
188457b952d6SSatish Balay     jj      = rstart*bs;
188557b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
188657b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
188757b952d6SSatish Balay       mycols += locrowlens[i];
188857b952d6SSatish Balay       vals   += locrowlens[i];
188957b952d6SSatish Balay       jj++;
189057b952d6SSatish Balay     }
189157b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
189257b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
189357b952d6SSatish Balay       nz = procsnz[i];
189457b952d6SSatish Balay       vals = buf;
189557b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
189657b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
189757b952d6SSatish Balay     }
189857b952d6SSatish Balay     /* the last proc */
189957b952d6SSatish Balay     if ( size != 1 ){
190057b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1901cee3aa6bSSatish Balay       vals = buf;
190257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
190357b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1904cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
190557b952d6SSatish Balay     }
190657b952d6SSatish Balay     PetscFree(procsnz);
190757b952d6SSatish Balay   }
190857b952d6SSatish Balay   else {
190957b952d6SSatish Balay     /* receive numeric values */
191057b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
191157b952d6SSatish Balay 
191257b952d6SSatish Balay     /* receive message of values*/
191357b952d6SSatish Balay     vals = buf;
1914cee3aa6bSSatish Balay     mycols = ibuf;
191557b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
191657b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1917e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
191857b952d6SSatish Balay 
191957b952d6SSatish Balay     /* insert into matrix */
192057b952d6SSatish Balay     jj      = rstart*bs;
1921cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
192257b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
192357b952d6SSatish Balay       mycols += locrowlens[i];
192457b952d6SSatish Balay       vals   += locrowlens[i];
192557b952d6SSatish Balay       jj++;
192657b952d6SSatish Balay     }
192757b952d6SSatish Balay   }
192857b952d6SSatish Balay   PetscFree(locrowlens);
192957b952d6SSatish Balay   PetscFree(buf);
193057b952d6SSatish Balay   PetscFree(ibuf);
193157b952d6SSatish Balay   PetscFree(rowners);
193257b952d6SSatish Balay   PetscFree(dlens);
1933cee3aa6bSSatish Balay   PetscFree(mask);
19346d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19356d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
193657b952d6SSatish Balay   return 0;
193757b952d6SSatish Balay }
193857b952d6SSatish Balay 
193957b952d6SSatish Balay 
1940