xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ac7a638e666390a48434b28bf12ee15429c96bff)
179bdfe76SSatish Balay #ifndef lint
2*ac7a638eSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.60 1997/03/27 20:43:27 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2257b952d6SSatish Balay 
233b2fbd54SBarry Smith 
24537820f0SBarry Smith /*
25537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2857b952d6SSatish Balay    length of colmap equals the global matrix length.
2957b952d6SSatish Balay */
305615d1e5SSatish Balay #undef __FUNC__
315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3357b952d6SSatish Balay {
3457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
36928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3757b952d6SSatish Balay 
3857b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
3957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4057b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
41928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4257b952d6SSatish Balay   return 0;
4357b952d6SSatish Balay }
4457b952d6SSatish Balay 
455615d1e5SSatish Balay #undef __FUNC__
465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
483b2fbd54SBarry Smith                             PetscTruth *done)
4957b952d6SSatish Balay {
503b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5157b952d6SSatish Balay   int         ierr;
523b2fbd54SBarry Smith   if (aij->size == 1) {
533b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
553b2fbd54SBarry Smith   return 0;
563b2fbd54SBarry Smith }
573b2fbd54SBarry Smith 
585615d1e5SSatish Balay #undef __FUNC__
595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
613b2fbd54SBarry Smith                                 PetscTruth *done)
623b2fbd54SBarry Smith {
633b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
643b2fbd54SBarry Smith   int        ierr;
653b2fbd54SBarry Smith   if (aij->size == 1) {
663b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
67e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6857b952d6SSatish Balay   return 0;
6957b952d6SSatish Balay }
7080c1aa95SSatish Balay #define CHUNKSIZE  10
7180c1aa95SSatish Balay 
72f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7380c1aa95SSatish Balay { \
7480c1aa95SSatish Balay  \
7580c1aa95SSatish Balay     brow = row/bs;  \
7680c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
77*ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
7880c1aa95SSatish Balay       bcol = col/bs; \
7980c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
80ab26458aSBarry Smith       low = 0; high = nrow; \
81ab26458aSBarry Smith       while (high-low > 3) { \
82ab26458aSBarry Smith         t = (low+high)/2; \
83ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
84ab26458aSBarry Smith         else              low  = t; \
85ab26458aSBarry Smith       } \
86ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
8780c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8880c1aa95SSatish Balay         if (rp[_i] == bcol) { \
8980c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
90eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
91eada6651SSatish Balay           else                    *bap  = value;  \
92*ac7a638eSSatish Balay           goto a_noinsert; \
9380c1aa95SSatish Balay         } \
9480c1aa95SSatish Balay       } \
95*ac7a638eSSatish Balay       if (a->nonew) goto a_noinsert; \
9680c1aa95SSatish Balay       if (nrow >= rmax) { \
9780c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9880c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
9980c1aa95SSatish Balay         Scalar *new_a; \
10080c1aa95SSatish Balay  \
10180c1aa95SSatish Balay         /* malloc new storage space */ \
10280c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
10380c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
10480c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10680c1aa95SSatish Balay  \
10780c1aa95SSatish Balay         /* copy over old data into new slots */ \
10880c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
10980c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
11080c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
11180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
11280c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
11380c1aa95SSatish Balay                                                            len*sizeof(int)); \
11480c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11580c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11680c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
11780c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
11880c1aa95SSatish Balay         /* free up old matrix storage */ \
11980c1aa95SSatish Balay         PetscFree(a->a);  \
12080c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
12180c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
12280c1aa95SSatish Balay         a->singlemalloc = 1; \
12380c1aa95SSatish Balay  \
12480c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
125*ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
12680c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
12780c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
12880c1aa95SSatish Balay         a->reallocs++; \
12980c1aa95SSatish Balay         a->nz++; \
13080c1aa95SSatish Balay       } \
13180c1aa95SSatish Balay       N = nrow++ - 1;  \
13280c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
13380c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
13480c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13580c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13680c1aa95SSatish Balay       } \
13780c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
13880c1aa95SSatish Balay       rp[_i]                      = bcol;  \
13980c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
140*ac7a638eSSatish Balay       a_noinsert:; \
14180c1aa95SSatish Balay     ailen[brow] = nrow; \
14280c1aa95SSatish Balay }
14357b952d6SSatish Balay 
144*ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
145*ac7a638eSSatish Balay { \
146*ac7a638eSSatish Balay  \
147*ac7a638eSSatish Balay     brow = row/bs;  \
148*ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
149*ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
150*ac7a638eSSatish Balay       bcol = col/bs; \
151*ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
152*ac7a638eSSatish Balay       low = 0; high = nrow; \
153*ac7a638eSSatish Balay       while (high-low > 3) { \
154*ac7a638eSSatish Balay         t = (low+high)/2; \
155*ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
156*ac7a638eSSatish Balay         else              low  = t; \
157*ac7a638eSSatish Balay       } \
158*ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
159*ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
160*ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
161*ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
162*ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
163*ac7a638eSSatish Balay           else                    *bap  = value;  \
164*ac7a638eSSatish Balay           goto b_noinsert; \
165*ac7a638eSSatish Balay         } \
166*ac7a638eSSatish Balay       } \
167*ac7a638eSSatish Balay       if (a->nonew) goto b_noinsert; \
168*ac7a638eSSatish Balay       if (nrow >= rmax) { \
169*ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
170*ac7a638eSSatish Balay         int    new_nz = bi[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
171*ac7a638eSSatish Balay         Scalar *new_a; \
172*ac7a638eSSatish Balay  \
173*ac7a638eSSatish Balay         /* malloc new storage space */ \
174*ac7a638eSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
175*ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
176*ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
177*ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
178*ac7a638eSSatish Balay  \
179*ac7a638eSSatish Balay         /* copy over old data into new slots */ \
180*ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
181*ac7a638eSSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
182*ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
183*ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
184*ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
185*ac7a638eSSatish Balay                                                            len*sizeof(int)); \
186*ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
187*ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
188*ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
189*ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
190*ac7a638eSSatish Balay         /* free up old matrix storage */ \
191*ac7a638eSSatish Balay         PetscFree(a->a);  \
192*ac7a638eSSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
193*ac7a638eSSatish Balay         ba = a->a = new_a; bi = a->i = new_i; bj = a->j = new_j;  \
194*ac7a638eSSatish Balay         a->singlemalloc = 1; \
195*ac7a638eSSatish Balay  \
196*ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
197*ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
198*ac7a638eSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
199*ac7a638eSSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
200*ac7a638eSSatish Balay         a->reallocs++; \
201*ac7a638eSSatish Balay         a->nz++; \
202*ac7a638eSSatish Balay       } \
203*ac7a638eSSatish Balay       N = nrow++ - 1;  \
204*ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
205*ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
206*ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
207*ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
208*ac7a638eSSatish Balay       } \
209*ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
210*ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
211*ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
212*ac7a638eSSatish Balay       b_noinsert:; \
213*ac7a638eSSatish Balay     bilen[brow] = nrow; \
214*ac7a638eSSatish Balay }
215*ac7a638eSSatish Balay 
2165615d1e5SSatish Balay #undef __FUNC__
2175615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
218ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
21957b952d6SSatish Balay {
22057b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
22157b952d6SSatish Balay   Scalar      value;
2224fa0d573SSatish Balay   int         ierr,i,j,row,col;
2234fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2244fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2254fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
22657b952d6SSatish Balay 
227eada6651SSatish Balay   /* Some Variables required in the macro */
22880c1aa95SSatish Balay   Mat         A = baij->A;
22980c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
230*ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
231*ac7a638eSSatish Balay   Scalar      *aa=a->a;
232*ac7a638eSSatish Balay 
233*ac7a638eSSatish Balay   Mat         B = baij->B;
234*ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
235*ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
236*ac7a638eSSatish Balay   Scalar      *ba=b->a;
237*ac7a638eSSatish Balay 
238*ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
239ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
240*ac7a638eSSatish Balay   Scalar      *ap,*bap;
24180c1aa95SSatish Balay 
24257b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
243639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
244e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
245e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
246639f9d9dSBarry Smith #endif
24757b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
24857b952d6SSatish Balay       row = im[i] - rstart_orig;
24957b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
25057b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
25157b952d6SSatish Balay           col = in[j] - cstart_orig;
25257b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
253f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
25480c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25557b952d6SSatish Balay         }
256639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
257e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
258e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
259639f9d9dSBarry Smith #endif
26057b952d6SSatish Balay         else {
26157b952d6SSatish Balay           if (mat->was_assembled) {
262905e6a2fSBarry Smith             if (!baij->colmap) {
263905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
264905e6a2fSBarry Smith             }
265905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
26657b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
26757b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
26857b952d6SSatish Balay               col =  in[j];
26957b952d6SSatish Balay             }
27057b952d6SSatish Balay           }
27157b952d6SSatish Balay           else col = in[j];
27257b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
273*ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
274*ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
27557b952d6SSatish Balay         }
27657b952d6SSatish Balay       }
27757b952d6SSatish Balay     }
27857b952d6SSatish Balay     else {
27990f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
28057b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
28157b952d6SSatish Balay       }
28257b952d6SSatish Balay       else {
28390f02eecSBarry Smith         if (!baij->donotstash) {
28457b952d6SSatish Balay           row = im[i];
28557b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
28657b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
28757b952d6SSatish Balay           }
28857b952d6SSatish Balay         }
28957b952d6SSatish Balay       }
29057b952d6SSatish Balay     }
29190f02eecSBarry Smith   }
29257b952d6SSatish Balay   return 0;
29357b952d6SSatish Balay }
29457b952d6SSatish Balay 
295ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
296ab26458aSBarry Smith #undef __FUNC__
297ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
298ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
299ab26458aSBarry Smith {
300ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
30130793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
302abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
303ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
304ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
305ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
306ab26458aSBarry Smith 
30730793edcSSatish Balay 
30830793edcSSatish Balay   if(!barray) {
30930793edcSSatish Balay     barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
31030793edcSSatish Balay     baij->barray = barray;
31130793edcSSatish Balay   }
31230793edcSSatish Balay 
313ab26458aSBarry Smith   if (roworiented) {
314ab26458aSBarry Smith     stepval = (n-1)*bs;
315ab26458aSBarry Smith   } else {
316ab26458aSBarry Smith     stepval = (m-1)*bs;
317ab26458aSBarry Smith   }
318ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
319ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
320ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
321ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
322ab26458aSBarry Smith #endif
323ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
324ab26458aSBarry Smith       row = im[i] - rstart;
325ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
326ab26458aSBarry Smith         if (roworiented) {
327ab26458aSBarry Smith           value = v + i*(stepval+bs)*bs + j*bs;
328ab26458aSBarry Smith         } else {
329ab26458aSBarry Smith           value = v + j*(stepval+bs)*bs + i*bs;
330abef11f7SSatish Balay         }
331ab26458aSBarry Smith         for ( ii=0; ii<bs; ii++,value+=stepval )
332ab26458aSBarry Smith           for (jj=0; jj<bs; jj++ )
33330793edcSSatish Balay             *barray++  = *value++;
33430793edcSSatish Balay         barray -=bs2;
335abef11f7SSatish Balay 
336abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
337abef11f7SSatish Balay           col = in[j] - cstart;
33830793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
339ab26458aSBarry Smith         }
340ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
341ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
342ab26458aSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");}
343ab26458aSBarry Smith #endif
344ab26458aSBarry Smith         else {
345ab26458aSBarry Smith           if (mat->was_assembled) {
346ab26458aSBarry Smith             if (!baij->colmap) {
347ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
348ab26458aSBarry Smith             }
349ab26458aSBarry Smith             col = baij->colmap[in[j]] - 1;
350ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
351ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
352ab26458aSBarry Smith               col =  in[j];
353ab26458aSBarry Smith             }
354ab26458aSBarry Smith           }
355ab26458aSBarry Smith           else col = in[j];
35630793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
357ab26458aSBarry Smith         }
358ab26458aSBarry Smith       }
359ab26458aSBarry Smith     }
360ab26458aSBarry Smith     else {
361ab26458aSBarry Smith       if (!baij->donotstash) {
362ab26458aSBarry Smith         if (roworiented ) {
363abef11f7SSatish Balay           row   = im[i]*bs;
364abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
365abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
366abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
367abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
368abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
369abef11f7SSatish Balay               }
370ab26458aSBarry Smith             }
371ab26458aSBarry Smith           }
372ab26458aSBarry Smith         }
373ab26458aSBarry Smith         else {
374ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
375abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
376abef11f7SSatish Balay             col   = in[j]*bs;
377abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
378abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
379abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
380ab26458aSBarry Smith               }
381ab26458aSBarry Smith             }
382ab26458aSBarry Smith           }
383abef11f7SSatish Balay 
384abef11f7SSatish Balay         }
385abef11f7SSatish Balay       }
386ab26458aSBarry Smith     }
387ab26458aSBarry Smith   }
388ab26458aSBarry Smith   return 0;
389ab26458aSBarry Smith }
390ab26458aSBarry Smith 
3915615d1e5SSatish Balay #undef __FUNC__
3925615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
393ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
394d6de1c52SSatish Balay {
395d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
396d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
397d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
398d6de1c52SSatish Balay 
399d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
400e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
401e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
402d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
403d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
404d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
405e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
406e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
407d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
408d6de1c52SSatish Balay           col = idxn[j] - bscstart;
409d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
410d6de1c52SSatish Balay         }
411d6de1c52SSatish Balay         else {
412905e6a2fSBarry Smith           if (!baij->colmap) {
413905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
414905e6a2fSBarry Smith           }
415e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
416dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
417d9d09a02SSatish Balay           else {
418dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
419d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
420d6de1c52SSatish Balay           }
421d6de1c52SSatish Balay         }
422d6de1c52SSatish Balay       }
423d9d09a02SSatish Balay     }
424d6de1c52SSatish Balay     else {
425e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
426d6de1c52SSatish Balay     }
427d6de1c52SSatish Balay   }
428d6de1c52SSatish Balay   return 0;
429d6de1c52SSatish Balay }
430d6de1c52SSatish Balay 
4315615d1e5SSatish Balay #undef __FUNC__
4325615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
433ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
434d6de1c52SSatish Balay {
435d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
436d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
437acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
438d6de1c52SSatish Balay   double     sum = 0.0;
439d6de1c52SSatish Balay   Scalar     *v;
440d6de1c52SSatish Balay 
441d6de1c52SSatish Balay   if (baij->size == 1) {
442d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
443d6de1c52SSatish Balay   } else {
444d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
445d6de1c52SSatish Balay       v = amat->a;
446d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
447d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
448d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
449d6de1c52SSatish Balay #else
450d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
451d6de1c52SSatish Balay #endif
452d6de1c52SSatish Balay       }
453d6de1c52SSatish Balay       v = bmat->a;
454d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
455d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
456d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
457d6de1c52SSatish Balay #else
458d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
459d6de1c52SSatish Balay #endif
460d6de1c52SSatish Balay       }
461d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
462d6de1c52SSatish Balay       *norm = sqrt(*norm);
463d6de1c52SSatish Balay     }
464acdf5bf4SSatish Balay     else
465e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
466d6de1c52SSatish Balay   }
467d6de1c52SSatish Balay   return 0;
468d6de1c52SSatish Balay }
46957b952d6SSatish Balay 
4705615d1e5SSatish Balay #undef __FUNC__
4715615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
472ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
47357b952d6SSatish Balay {
47457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
47557b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
47657b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
47757b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
47857b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
47957b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
48057b952d6SSatish Balay   InsertMode  addv;
48157b952d6SSatish Balay   Scalar      *rvalues,*svalues;
48257b952d6SSatish Balay 
48357b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
484e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
48557b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
486e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
48757b952d6SSatish Balay   }
488e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
48957b952d6SSatish Balay 
49057b952d6SSatish Balay   /*  first count number of contributors to each processor */
49157b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
49257b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
49357b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
49457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
49557b952d6SSatish Balay     idx = baij->stash.idx[i];
49657b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
49757b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
49857b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
49957b952d6SSatish Balay       }
50057b952d6SSatish Balay     }
50157b952d6SSatish Balay   }
50257b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
50357b952d6SSatish Balay 
50457b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
50557b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
50657b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
50757b952d6SSatish Balay   nreceives = work[rank];
50857b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
50957b952d6SSatish Balay   nmax = work[rank];
51057b952d6SSatish Balay   PetscFree(work);
51157b952d6SSatish Balay 
51257b952d6SSatish Balay   /* post receives:
51357b952d6SSatish Balay        1) each message will consist of ordered pairs
51457b952d6SSatish Balay      (global index,value) we store the global index as a double
51557b952d6SSatish Balay      to simplify the message passing.
51657b952d6SSatish Balay        2) since we don't know how long each individual message is we
51757b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
51857b952d6SSatish Balay      this is a lot of wasted space.
51957b952d6SSatish Balay 
52057b952d6SSatish Balay 
52157b952d6SSatish Balay        This could be done better.
52257b952d6SSatish Balay   */
52357b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
52457b952d6SSatish Balay   CHKPTRQ(rvalues);
52557b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
52657b952d6SSatish Balay   CHKPTRQ(recv_waits);
52757b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
52857b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
52957b952d6SSatish Balay               comm,recv_waits+i);
53057b952d6SSatish Balay   }
53157b952d6SSatish Balay 
53257b952d6SSatish Balay   /* do sends:
53357b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
53457b952d6SSatish Balay          the ith processor
53557b952d6SSatish Balay   */
53657b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
53757b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
53857b952d6SSatish Balay   CHKPTRQ(send_waits);
53957b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
54057b952d6SSatish Balay   starts[0] = 0;
54157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
54257b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
54357b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
54457b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
54557b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
54657b952d6SSatish Balay   }
54757b952d6SSatish Balay   PetscFree(owner);
54857b952d6SSatish Balay   starts[0] = 0;
54957b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
55057b952d6SSatish Balay   count = 0;
55157b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
55257b952d6SSatish Balay     if (procs[i]) {
55357b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
55457b952d6SSatish Balay                 comm,send_waits+count++);
55557b952d6SSatish Balay     }
55657b952d6SSatish Balay   }
55757b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
55857b952d6SSatish Balay 
55957b952d6SSatish Balay   /* Free cache space */
560d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
56157b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
56257b952d6SSatish Balay 
56357b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
56457b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
56557b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
56657b952d6SSatish Balay   baij->rmax       = nmax;
56757b952d6SSatish Balay 
56857b952d6SSatish Balay   return 0;
56957b952d6SSatish Balay }
57057b952d6SSatish Balay 
57157b952d6SSatish Balay 
5725615d1e5SSatish Balay #undef __FUNC__
5735615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
574ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
57557b952d6SSatish Balay {
57657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
57757b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
57857b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
57957b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
58057b952d6SSatish Balay   Scalar      *values,val;
581e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
58257b952d6SSatish Balay 
58357b952d6SSatish Balay   /*  wait on receives */
58457b952d6SSatish Balay   while (count) {
58557b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
58657b952d6SSatish Balay     /* unpack receives into our local space */
58757b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
58857b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
58957b952d6SSatish Balay     n = n/3;
59057b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
59157b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
59257b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
59357b952d6SSatish Balay       val = values[3*i+2];
59457b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
59557b952d6SSatish Balay         col -= baij->cstart*bs;
59657b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
59757b952d6SSatish Balay       }
59857b952d6SSatish Balay       else {
59957b952d6SSatish Balay         if (mat->was_assembled) {
600905e6a2fSBarry Smith           if (!baij->colmap) {
601905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
602905e6a2fSBarry Smith           }
603905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
60457b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
60557b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
60657b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
60757b952d6SSatish Balay           }
60857b952d6SSatish Balay         }
60957b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
61057b952d6SSatish Balay       }
61157b952d6SSatish Balay     }
61257b952d6SSatish Balay     count--;
61357b952d6SSatish Balay   }
61457b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
61557b952d6SSatish Balay 
61657b952d6SSatish Balay   /* wait on sends */
61757b952d6SSatish Balay   if (baij->nsends) {
61857b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
61957b952d6SSatish Balay     CHKPTRQ(send_status);
62057b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
62157b952d6SSatish Balay     PetscFree(send_status);
62257b952d6SSatish Balay   }
62357b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
62457b952d6SSatish Balay 
62557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
62657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
62757b952d6SSatish Balay 
62857b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
62957b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
63057b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
63157b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
63257b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
63357b952d6SSatish Balay   }
63457b952d6SSatish Balay 
6356d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
63657b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
63757b952d6SSatish Balay   }
63857b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
63957b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
64057b952d6SSatish Balay 
64157b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
64257b952d6SSatish Balay   return 0;
64357b952d6SSatish Balay }
64457b952d6SSatish Balay 
6455615d1e5SSatish Balay #undef __FUNC__
6465615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
64757b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
64857b952d6SSatish Balay {
64957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
65057b952d6SSatish Balay   int          ierr;
65157b952d6SSatish Balay 
65257b952d6SSatish Balay   if (baij->size == 1) {
65357b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
65457b952d6SSatish Balay   }
655e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
65657b952d6SSatish Balay   return 0;
65757b952d6SSatish Balay }
65857b952d6SSatish Balay 
6595615d1e5SSatish Balay #undef __FUNC__
6605615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
66157b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
66257b952d6SSatish Balay {
66357b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
664cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
66557b952d6SSatish Balay   FILE         *fd;
66657b952d6SSatish Balay   ViewerType   vtype;
66757b952d6SSatish Balay 
66857b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
66957b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
67057b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
671639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
6724e220ebcSLois Curfman McInnes       MatInfo info;
67357b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
67457b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6754e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
67657b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
67757b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
6784e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
6794e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
6804e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
6814e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
6824e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
6834e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
68457b952d6SSatish Balay       fflush(fd);
68557b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
68657b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
68757b952d6SSatish Balay       return 0;
68857b952d6SSatish Balay     }
689639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
690bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
69157b952d6SSatish Balay       return 0;
69257b952d6SSatish Balay     }
69357b952d6SSatish Balay   }
69457b952d6SSatish Balay 
69557b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
69657b952d6SSatish Balay     Draw       draw;
69757b952d6SSatish Balay     PetscTruth isnull;
69857b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
69957b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
70057b952d6SSatish Balay   }
70157b952d6SSatish Balay 
70257b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
70357b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
70457b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
70557b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
70657b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
70757b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
70857b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
70957b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
71057b952d6SSatish Balay     fflush(fd);
71157b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
71257b952d6SSatish Balay   }
71357b952d6SSatish Balay   else {
71457b952d6SSatish Balay     int size = baij->size;
71557b952d6SSatish Balay     rank = baij->rank;
71657b952d6SSatish Balay     if (size == 1) {
71757b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
71857b952d6SSatish Balay     }
71957b952d6SSatish Balay     else {
72057b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
72157b952d6SSatish Balay       Mat         A;
72257b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
72357b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
72457b952d6SSatish Balay       int         mbs=baij->mbs;
72557b952d6SSatish Balay       Scalar      *a;
72657b952d6SSatish Balay 
72757b952d6SSatish Balay       if (!rank) {
728cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
72957b952d6SSatish Balay         CHKERRQ(ierr);
73057b952d6SSatish Balay       }
73157b952d6SSatish Balay       else {
732cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
73357b952d6SSatish Balay         CHKERRQ(ierr);
73457b952d6SSatish Balay       }
73557b952d6SSatish Balay       PLogObjectParent(mat,A);
73657b952d6SSatish Balay 
73757b952d6SSatish Balay       /* copy over the A part */
73857b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
73957b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
74057b952d6SSatish Balay       row = baij->rstart;
74157b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
74257b952d6SSatish Balay 
74357b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
74457b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
74557b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
74657b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
74757b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
74857b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
749cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
750cee3aa6bSSatish Balay             col++; a += bs;
75157b952d6SSatish Balay           }
75257b952d6SSatish Balay         }
75357b952d6SSatish Balay       }
75457b952d6SSatish Balay       /* copy over the B part */
75557b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
75657b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
75757b952d6SSatish Balay       row = baij->rstart*bs;
75857b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
75957b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
76057b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
76157b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
76257b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
76357b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
764cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
765cee3aa6bSSatish Balay             col++; a += bs;
76657b952d6SSatish Balay           }
76757b952d6SSatish Balay         }
76857b952d6SSatish Balay       }
76957b952d6SSatish Balay       PetscFree(rvals);
7706d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7716d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
77257b952d6SSatish Balay       if (!rank) {
77357b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
77457b952d6SSatish Balay       }
77557b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
77657b952d6SSatish Balay     }
77757b952d6SSatish Balay   }
77857b952d6SSatish Balay   return 0;
77957b952d6SSatish Balay }
78057b952d6SSatish Balay 
78157b952d6SSatish Balay 
78257b952d6SSatish Balay 
7835615d1e5SSatish Balay #undef __FUNC__
7845615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
785ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
78657b952d6SSatish Balay {
78757b952d6SSatish Balay   Mat         mat = (Mat) obj;
78857b952d6SSatish Balay   int         ierr;
78957b952d6SSatish Balay   ViewerType  vtype;
79057b952d6SSatish Balay 
79157b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
79257b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
79357b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
79457b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
79557b952d6SSatish Balay   }
79657b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
79757b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
79857b952d6SSatish Balay   }
79957b952d6SSatish Balay   return 0;
80057b952d6SSatish Balay }
80157b952d6SSatish Balay 
8025615d1e5SSatish Balay #undef __FUNC__
8035615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
804ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
80579bdfe76SSatish Balay {
80679bdfe76SSatish Balay   Mat         mat = (Mat) obj;
80779bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
80879bdfe76SSatish Balay   int         ierr;
80979bdfe76SSatish Balay 
81079bdfe76SSatish Balay #if defined(PETSC_LOG)
81179bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
81279bdfe76SSatish Balay #endif
81379bdfe76SSatish Balay 
81479bdfe76SSatish Balay   PetscFree(baij->rowners);
81579bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
81679bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
81779bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
81879bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
81979bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
82079bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
82179bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
82230793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
82379bdfe76SSatish Balay   PetscFree(baij);
82490f02eecSBarry Smith   if (mat->mapping) {
82590f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
82690f02eecSBarry Smith   }
82779bdfe76SSatish Balay   PLogObjectDestroy(mat);
82879bdfe76SSatish Balay   PetscHeaderDestroy(mat);
82979bdfe76SSatish Balay   return 0;
83079bdfe76SSatish Balay }
83179bdfe76SSatish Balay 
8325615d1e5SSatish Balay #undef __FUNC__
8335615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
834ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
835cee3aa6bSSatish Balay {
836cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
83747b4a8eaSLois Curfman McInnes   int         ierr, nt;
838cee3aa6bSSatish Balay 
839c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
84047b4a8eaSLois Curfman McInnes   if (nt != a->n) {
841ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
84247b4a8eaSLois Curfman McInnes   }
843c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
84447b4a8eaSLois Curfman McInnes   if (nt != a->m) {
845e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
84647b4a8eaSLois Curfman McInnes   }
84743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
848cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
84943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
850cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
85143a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
852cee3aa6bSSatish Balay   return 0;
853cee3aa6bSSatish Balay }
854cee3aa6bSSatish Balay 
8555615d1e5SSatish Balay #undef __FUNC__
8565615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
857ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
858cee3aa6bSSatish Balay {
859cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
860cee3aa6bSSatish Balay   int        ierr;
86143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
862cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
86343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
864cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
865cee3aa6bSSatish Balay   return 0;
866cee3aa6bSSatish Balay }
867cee3aa6bSSatish Balay 
8685615d1e5SSatish Balay #undef __FUNC__
8695615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
870ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
871cee3aa6bSSatish Balay {
872cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
873cee3aa6bSSatish Balay   int        ierr;
874cee3aa6bSSatish Balay 
875cee3aa6bSSatish Balay   /* do nondiagonal part */
876cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
877cee3aa6bSSatish Balay   /* send it on its way */
878537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
879cee3aa6bSSatish Balay   /* do local part */
880cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
881cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
882cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
883cee3aa6bSSatish Balay   /* but is not perhaps always true. */
884639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
885cee3aa6bSSatish Balay   return 0;
886cee3aa6bSSatish Balay }
887cee3aa6bSSatish Balay 
8885615d1e5SSatish Balay #undef __FUNC__
8895615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
890ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
891cee3aa6bSSatish Balay {
892cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
893cee3aa6bSSatish Balay   int        ierr;
894cee3aa6bSSatish Balay 
895cee3aa6bSSatish Balay   /* do nondiagonal part */
896cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
897cee3aa6bSSatish Balay   /* send it on its way */
898537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
899cee3aa6bSSatish Balay   /* do local part */
900cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
901cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
902cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
903cee3aa6bSSatish Balay   /* but is not perhaps always true. */
904537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
905cee3aa6bSSatish Balay   return 0;
906cee3aa6bSSatish Balay }
907cee3aa6bSSatish Balay 
908cee3aa6bSSatish Balay /*
909cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
910cee3aa6bSSatish Balay    diagonal block
911cee3aa6bSSatish Balay */
9125615d1e5SSatish Balay #undef __FUNC__
9135615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
914ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
915cee3aa6bSSatish Balay {
916cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
917cee3aa6bSSatish Balay   if (a->M != a->N)
918e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
919cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
920cee3aa6bSSatish Balay }
921cee3aa6bSSatish Balay 
9225615d1e5SSatish Balay #undef __FUNC__
9235615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
924ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
925cee3aa6bSSatish Balay {
926cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
927cee3aa6bSSatish Balay   int        ierr;
928cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
929cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
930cee3aa6bSSatish Balay   return 0;
931cee3aa6bSSatish Balay }
932026e39d0SSatish Balay 
9335615d1e5SSatish Balay #undef __FUNC__
9345615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
935ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
93657b952d6SSatish Balay {
93757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
93857b952d6SSatish Balay   *m = mat->M; *n = mat->N;
93957b952d6SSatish Balay   return 0;
94057b952d6SSatish Balay }
94157b952d6SSatish Balay 
9425615d1e5SSatish Balay #undef __FUNC__
9435615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
944ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
94557b952d6SSatish Balay {
94657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
94757b952d6SSatish Balay   *m = mat->m; *n = mat->N;
94857b952d6SSatish Balay   return 0;
94957b952d6SSatish Balay }
95057b952d6SSatish Balay 
9515615d1e5SSatish Balay #undef __FUNC__
9525615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
953ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
95457b952d6SSatish Balay {
95557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
95657b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
95757b952d6SSatish Balay   return 0;
95857b952d6SSatish Balay }
95957b952d6SSatish Balay 
960acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
961acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
962acdf5bf4SSatish Balay 
9635615d1e5SSatish Balay #undef __FUNC__
9645615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
965acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
966acdf5bf4SSatish Balay {
967acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
968acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
969acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
970d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
971d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
972acdf5bf4SSatish Balay 
973e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
974acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
975acdf5bf4SSatish Balay 
976acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
977acdf5bf4SSatish Balay     /*
978acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
979acdf5bf4SSatish Balay     */
980acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
981bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
982bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
983acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
984acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
985acdf5bf4SSatish Balay     }
986acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
987acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
988acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
989acdf5bf4SSatish Balay   }
990acdf5bf4SSatish Balay 
991acdf5bf4SSatish Balay 
992e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
993d9d09a02SSatish Balay   lrow = row - brstart;
994acdf5bf4SSatish Balay 
995acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
996acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
997acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
998acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
999acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1000acdf5bf4SSatish Balay   nztot = nzA + nzB;
1001acdf5bf4SSatish Balay 
1002acdf5bf4SSatish Balay   cmap  = mat->garray;
1003acdf5bf4SSatish Balay   if (v  || idx) {
1004acdf5bf4SSatish Balay     if (nztot) {
1005acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1006acdf5bf4SSatish Balay       int imark = -1;
1007acdf5bf4SSatish Balay       if (v) {
1008acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1009acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1010d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1011acdf5bf4SSatish Balay           else break;
1012acdf5bf4SSatish Balay         }
1013acdf5bf4SSatish Balay         imark = i;
1014acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1015acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1016acdf5bf4SSatish Balay       }
1017acdf5bf4SSatish Balay       if (idx) {
1018acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1019acdf5bf4SSatish Balay         if (imark > -1) {
1020acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1021bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1022acdf5bf4SSatish Balay           }
1023acdf5bf4SSatish Balay         } else {
1024acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1025d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1026d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1027acdf5bf4SSatish Balay             else break;
1028acdf5bf4SSatish Balay           }
1029acdf5bf4SSatish Balay           imark = i;
1030acdf5bf4SSatish Balay         }
1031d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1032d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1033acdf5bf4SSatish Balay       }
1034acdf5bf4SSatish Balay     }
1035d212a18eSSatish Balay     else {
1036d212a18eSSatish Balay       if (idx) *idx = 0;
1037d212a18eSSatish Balay       if (v)   *v   = 0;
1038d212a18eSSatish Balay     }
1039acdf5bf4SSatish Balay   }
1040acdf5bf4SSatish Balay   *nz = nztot;
1041acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1042acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1043acdf5bf4SSatish Balay   return 0;
1044acdf5bf4SSatish Balay }
1045acdf5bf4SSatish Balay 
10465615d1e5SSatish Balay #undef __FUNC__
10475615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1048acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1049acdf5bf4SSatish Balay {
1050acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1051acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1052e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
1053acdf5bf4SSatish Balay   }
1054acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
1055acdf5bf4SSatish Balay   return 0;
1056acdf5bf4SSatish Balay }
1057acdf5bf4SSatish Balay 
10585615d1e5SSatish Balay #undef __FUNC__
10595615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1060ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
10615a838052SSatish Balay {
10625a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
10635a838052SSatish Balay   *bs = baij->bs;
10645a838052SSatish Balay   return 0;
10655a838052SSatish Balay }
10665a838052SSatish Balay 
10675615d1e5SSatish Balay #undef __FUNC__
10685615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1069ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
107058667388SSatish Balay {
107158667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
107258667388SSatish Balay   int         ierr;
107358667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
107458667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
107558667388SSatish Balay   return 0;
107658667388SSatish Balay }
10770ac07820SSatish Balay 
10785615d1e5SSatish Balay #undef __FUNC__
10795615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1080ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
10810ac07820SSatish Balay {
10824e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
10834e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
10847d57db60SLois Curfman McInnes   int         ierr;
10857d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
10860ac07820SSatish Balay 
10874e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
10884e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
10894e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
10904e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
10914e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
10924e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
10934e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
10944e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
10954e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
10960ac07820SSatish Balay   if (flag == MAT_LOCAL) {
10974e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
10984e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
10994e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
11004e220ebcSLois Curfman McInnes     info->memory       = isend[3];
11014e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
11020ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1103dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
11044e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11054e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11064e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11074e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11084e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11090ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1110dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
11114e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11124e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11134e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11144e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11154e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11160ac07820SSatish Balay   }
11174e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
11184e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
11194e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
11200ac07820SSatish Balay   return 0;
11210ac07820SSatish Balay }
11220ac07820SSatish Balay 
11235615d1e5SSatish Balay #undef __FUNC__
11245615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1125ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
112658667388SSatish Balay {
112758667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
112858667388SSatish Balay 
112958667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
113058667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
11316da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1132c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
1133c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1134b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1135b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1136b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1137aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
113858667388SSatish Balay         MatSetOption(a->A,op);
113958667388SSatish Balay         MatSetOption(a->B,op);
1140b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
11416da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
114258667388SSatish Balay              op == MAT_SYMMETRIC ||
114358667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
114458667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
114558667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
114658667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
114758667388SSatish Balay     a->roworiented = 0;
114858667388SSatish Balay     MatSetOption(a->A,op);
114958667388SSatish Balay     MatSetOption(a->B,op);
115090f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
115190f02eecSBarry Smith     a->donotstash = 1;
115290f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1153e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
115458667388SSatish Balay   else
1155e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
115658667388SSatish Balay   return 0;
115758667388SSatish Balay }
115858667388SSatish Balay 
11595615d1e5SSatish Balay #undef __FUNC__
11605615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1161ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
11620ac07820SSatish Balay {
11630ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
11640ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
11650ac07820SSatish Balay   Mat        B;
11660ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
11670ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
11680ac07820SSatish Balay   Scalar     *a;
11690ac07820SSatish Balay 
11700ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1171e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
11720ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
11730ac07820SSatish Balay   CHKERRQ(ierr);
11740ac07820SSatish Balay 
11750ac07820SSatish Balay   /* copy over the A part */
11760ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
11770ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11780ac07820SSatish Balay   row = baij->rstart;
11790ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
11800ac07820SSatish Balay 
11810ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
11820ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
11830ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
11840ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
11850ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
11860ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
11870ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
11880ac07820SSatish Balay         col++; a += bs;
11890ac07820SSatish Balay       }
11900ac07820SSatish Balay     }
11910ac07820SSatish Balay   }
11920ac07820SSatish Balay   /* copy over the B part */
11930ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
11940ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11950ac07820SSatish Balay   row = baij->rstart*bs;
11960ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
11970ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
11980ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
11990ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12000ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
12010ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12020ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12030ac07820SSatish Balay         col++; a += bs;
12040ac07820SSatish Balay       }
12050ac07820SSatish Balay     }
12060ac07820SSatish Balay   }
12070ac07820SSatish Balay   PetscFree(rvals);
12080ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12090ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12100ac07820SSatish Balay 
12110ac07820SSatish Balay   if (matout != PETSC_NULL) {
12120ac07820SSatish Balay     *matout = B;
12130ac07820SSatish Balay   } else {
12140ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
12150ac07820SSatish Balay     PetscFree(baij->rowners);
12160ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
12170ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
12180ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
12190ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
12200ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
12210ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
12220ac07820SSatish Balay     PetscFree(baij);
12230ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
12240ac07820SSatish Balay     PetscHeaderDestroy(B);
12250ac07820SSatish Balay   }
12260ac07820SSatish Balay   return 0;
12270ac07820SSatish Balay }
12280e95ebc0SSatish Balay 
12295615d1e5SSatish Balay #undef __FUNC__
12305615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
12310e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
12320e95ebc0SSatish Balay {
12330e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
12340e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
12350e95ebc0SSatish Balay   int ierr,s1,s2,s3;
12360e95ebc0SSatish Balay 
12370e95ebc0SSatish Balay   if (ll)  {
12380e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
12390e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1240e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
12410e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
12420e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
12430e95ebc0SSatish Balay   }
1244e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
12450e95ebc0SSatish Balay   return 0;
12460e95ebc0SSatish Balay }
12470e95ebc0SSatish Balay 
12480ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
12490ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
12500ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
12510ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
12520ac07820SSatish Balay    routine.
12530ac07820SSatish Balay */
12545615d1e5SSatish Balay #undef __FUNC__
12555615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1256ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
12570ac07820SSatish Balay {
12580ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
12590ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
12600ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
12610ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
12620ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
12630ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
12640ac07820SSatish Balay   MPI_Comm       comm = A->comm;
12650ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
12660ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
12670ac07820SSatish Balay   IS             istmp;
12680ac07820SSatish Balay 
12690ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
12700ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
12710ac07820SSatish Balay 
12720ac07820SSatish Balay   /*  first count number of contributors to each processor */
12730ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
12740ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
12750ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
12760ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
12770ac07820SSatish Balay     idx = rows[i];
12780ac07820SSatish Balay     found = 0;
12790ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
12800ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
12810ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
12820ac07820SSatish Balay       }
12830ac07820SSatish Balay     }
1284e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
12850ac07820SSatish Balay   }
12860ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
12870ac07820SSatish Balay 
12880ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
12890ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
12900ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
12910ac07820SSatish Balay   nrecvs = work[rank];
12920ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
12930ac07820SSatish Balay   nmax = work[rank];
12940ac07820SSatish Balay   PetscFree(work);
12950ac07820SSatish Balay 
12960ac07820SSatish Balay   /* post receives:   */
12970ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
12980ac07820SSatish Balay   CHKPTRQ(rvalues);
12990ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
13000ac07820SSatish Balay   CHKPTRQ(recv_waits);
13010ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13020ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
13030ac07820SSatish Balay   }
13040ac07820SSatish Balay 
13050ac07820SSatish Balay   /* do sends:
13060ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
13070ac07820SSatish Balay          the ith processor
13080ac07820SSatish Balay   */
13090ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
13100ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
13110ac07820SSatish Balay   CHKPTRQ(send_waits);
13120ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
13130ac07820SSatish Balay   starts[0] = 0;
13140ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13150ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13160ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
13170ac07820SSatish Balay   }
13180ac07820SSatish Balay   ISRestoreIndices(is,&rows);
13190ac07820SSatish Balay 
13200ac07820SSatish Balay   starts[0] = 0;
13210ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13220ac07820SSatish Balay   count = 0;
13230ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
13240ac07820SSatish Balay     if (procs[i]) {
13250ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
13260ac07820SSatish Balay     }
13270ac07820SSatish Balay   }
13280ac07820SSatish Balay   PetscFree(starts);
13290ac07820SSatish Balay 
13300ac07820SSatish Balay   base = owners[rank]*bs;
13310ac07820SSatish Balay 
13320ac07820SSatish Balay   /*  wait on receives */
13330ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
13340ac07820SSatish Balay   source = lens + nrecvs;
13350ac07820SSatish Balay   count  = nrecvs; slen = 0;
13360ac07820SSatish Balay   while (count) {
13370ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
13380ac07820SSatish Balay     /* unpack receives into our local space */
13390ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
13400ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
13410ac07820SSatish Balay     lens[imdex]  = n;
13420ac07820SSatish Balay     slen += n;
13430ac07820SSatish Balay     count--;
13440ac07820SSatish Balay   }
13450ac07820SSatish Balay   PetscFree(recv_waits);
13460ac07820SSatish Balay 
13470ac07820SSatish Balay   /* move the data into the send scatter */
13480ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
13490ac07820SSatish Balay   count = 0;
13500ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13510ac07820SSatish Balay     values = rvalues + i*nmax;
13520ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
13530ac07820SSatish Balay       lrows[count++] = values[j] - base;
13540ac07820SSatish Balay     }
13550ac07820SSatish Balay   }
13560ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
13570ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
13580ac07820SSatish Balay 
13590ac07820SSatish Balay   /* actually zap the local rows */
1360537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
13610ac07820SSatish Balay   PLogObjectParent(A,istmp);
13620ac07820SSatish Balay   PetscFree(lrows);
13630ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
13640ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
13650ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
13660ac07820SSatish Balay 
13670ac07820SSatish Balay   /* wait on sends */
13680ac07820SSatish Balay   if (nsends) {
13690ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
13700ac07820SSatish Balay     CHKPTRQ(send_status);
13710ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
13720ac07820SSatish Balay     PetscFree(send_status);
13730ac07820SSatish Balay   }
13740ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
13750ac07820SSatish Balay 
13760ac07820SSatish Balay   return 0;
13770ac07820SSatish Balay }
1378ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
13795615d1e5SSatish Balay #undef __FUNC__
13805615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1381ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1382ba4ca20aSSatish Balay {
1383ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1384ba4ca20aSSatish Balay 
1385ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1386ba4ca20aSSatish Balay   else return 0;
1387ba4ca20aSSatish Balay }
13880ac07820SSatish Balay 
13895615d1e5SSatish Balay #undef __FUNC__
13905615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1391ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1392bb5a7306SBarry Smith {
1393bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1394bb5a7306SBarry Smith   int         ierr;
1395bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1396bb5a7306SBarry Smith   return 0;
1397bb5a7306SBarry Smith }
1398bb5a7306SBarry Smith 
13990ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
14000ac07820SSatish Balay 
140179bdfe76SSatish Balay /* -------------------------------------------------------------------*/
140279bdfe76SSatish Balay static struct _MatOps MatOps = {
1403bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
14044c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
14054c50302cSBarry Smith   0,0,0,0,
14060ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
14070e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
140858667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
14094c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
14104c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
14114c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
141294a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1413d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1414ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1415bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1416ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
141779bdfe76SSatish Balay 
141879bdfe76SSatish Balay 
14195615d1e5SSatish Balay #undef __FUNC__
14205615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
142179bdfe76SSatish Balay /*@C
142279bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
142379bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
142479bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
142579bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
142679bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
142779bdfe76SSatish Balay 
142879bdfe76SSatish Balay    Input Parameters:
142979bdfe76SSatish Balay .  comm - MPI communicator
143079bdfe76SSatish Balay .  bs   - size of blockk
143179bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
143292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
143392e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
143492e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
143592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
143692e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
143779bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
143892e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
143979bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
144079bdfe76SSatish Balay            submatrix  (same for all local rows)
144192e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
144292e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
144392e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
144492e8d321SLois Curfman McInnes            it is zero.
144592e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
144679bdfe76SSatish Balay            submatrix (same for all local rows).
144792e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
144892e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
144992e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
145079bdfe76SSatish Balay 
145179bdfe76SSatish Balay    Output Parameter:
145279bdfe76SSatish Balay .  A - the matrix
145379bdfe76SSatish Balay 
145479bdfe76SSatish Balay    Notes:
145579bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
145679bdfe76SSatish Balay    (possibly both).
145779bdfe76SSatish Balay 
145879bdfe76SSatish Balay    Storage Information:
145979bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
146079bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
146179bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
146279bdfe76SSatish Balay    local matrix (a rectangular submatrix).
146379bdfe76SSatish Balay 
146479bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
146579bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
146679bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
146779bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
146879bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
146979bdfe76SSatish Balay 
147079bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
147179bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
147279bdfe76SSatish Balay 
147379bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
147479bdfe76SSatish Balay $         -------------------
147579bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
147679bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
147779bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
147879bdfe76SSatish Balay $         -------------------
147979bdfe76SSatish Balay $
148079bdfe76SSatish Balay 
148179bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
148279bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
148379bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
148457b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
148579bdfe76SSatish Balay 
148679bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
148779bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
148879bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
148992e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
149092e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
14916da5968aSLois Curfman McInnes    matrices.
149279bdfe76SSatish Balay 
149392e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
149479bdfe76SSatish Balay 
149579bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
149679bdfe76SSatish Balay @*/
149779bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
149879bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
149979bdfe76SSatish Balay {
150079bdfe76SSatish Balay   Mat          B;
150179bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
15024c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
150379bdfe76SSatish Balay 
1504e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
150579bdfe76SSatish Balay   *A = 0;
150679bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
150779bdfe76SSatish Balay   PLogObjectCreate(B);
150879bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
150979bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
151079bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
15114c50302cSBarry Smith   MPI_Comm_size(comm,&size);
15124c50302cSBarry Smith   if (size == 1) {
15134c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
15144c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
15154c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
15164c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
15174c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
15184c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
15194c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
15204c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
15214c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
15224c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
15234c50302cSBarry Smith   }
15244c50302cSBarry Smith 
152579bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
152679bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
152790f02eecSBarry Smith   B->mapping    = 0;
152879bdfe76SSatish Balay   B->factor     = 0;
152979bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
153079bdfe76SSatish Balay 
1531e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
153279bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
153379bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
153479bdfe76SSatish Balay 
153579bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1536e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1537e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1538e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1539cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1540cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
154179bdfe76SSatish Balay 
154279bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
154379bdfe76SSatish Balay     work[0] = m; work[1] = n;
154479bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
154579bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
154679bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
154779bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
154879bdfe76SSatish Balay   }
154979bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
155079bdfe76SSatish Balay     Mbs = M/bs;
1551e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
155279bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
155379bdfe76SSatish Balay     m   = mbs*bs;
155479bdfe76SSatish Balay   }
155579bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
155679bdfe76SSatish Balay     Nbs = N/bs;
1557e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
155879bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
155979bdfe76SSatish Balay     n   = nbs*bs;
156079bdfe76SSatish Balay   }
1561e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
156279bdfe76SSatish Balay 
156379bdfe76SSatish Balay   b->m = m; B->m = m;
156479bdfe76SSatish Balay   b->n = n; B->n = n;
156579bdfe76SSatish Balay   b->N = N; B->N = N;
156679bdfe76SSatish Balay   b->M = M; B->M = M;
156779bdfe76SSatish Balay   b->bs  = bs;
156879bdfe76SSatish Balay   b->bs2 = bs*bs;
156979bdfe76SSatish Balay   b->mbs = mbs;
157079bdfe76SSatish Balay   b->nbs = nbs;
157179bdfe76SSatish Balay   b->Mbs = Mbs;
157279bdfe76SSatish Balay   b->Nbs = Nbs;
157379bdfe76SSatish Balay 
157479bdfe76SSatish Balay   /* build local table of row and column ownerships */
157579bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
157679bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
15770ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
157879bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
157979bdfe76SSatish Balay   b->rowners[0] = 0;
158079bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
158179bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
158279bdfe76SSatish Balay   }
158379bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
158479bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
15854fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
15864fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
15874fa0d573SSatish Balay 
158879bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
158979bdfe76SSatish Balay   b->cowners[0] = 0;
159079bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
159179bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
159279bdfe76SSatish Balay   }
159379bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
159479bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
15954fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
15964fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
159779bdfe76SSatish Balay 
159879bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
159979bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
160079bdfe76SSatish Balay   PLogObjectParent(B,b->A);
160179bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
160279bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
160379bdfe76SSatish Balay   PLogObjectParent(B,b->B);
160479bdfe76SSatish Balay 
160579bdfe76SSatish Balay   /* build cache for off array entries formed */
160679bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
160790f02eecSBarry Smith   b->donotstash  = 0;
160879bdfe76SSatish Balay   b->colmap      = 0;
160979bdfe76SSatish Balay   b->garray      = 0;
161079bdfe76SSatish Balay   b->roworiented = 1;
161179bdfe76SSatish Balay 
161230793edcSSatish Balay   /* stuff used in block assembly */
161330793edcSSatish Balay   b->barray      = 0;
161430793edcSSatish Balay 
161579bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
161679bdfe76SSatish Balay   b->lvec        = 0;
161779bdfe76SSatish Balay   b->Mvctx       = 0;
161879bdfe76SSatish Balay 
161979bdfe76SSatish Balay   /* stuff for MatGetRow() */
162079bdfe76SSatish Balay   b->rowindices   = 0;
162179bdfe76SSatish Balay   b->rowvalues    = 0;
162279bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
162379bdfe76SSatish Balay 
162479bdfe76SSatish Balay   *A = B;
162579bdfe76SSatish Balay   return 0;
162679bdfe76SSatish Balay }
1627026e39d0SSatish Balay 
16285615d1e5SSatish Balay #undef __FUNC__
16295615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
16300ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
16310ac07820SSatish Balay {
16320ac07820SSatish Balay   Mat         mat;
16330ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
16340ac07820SSatish Balay   int         ierr, len=0, flg;
16350ac07820SSatish Balay 
16360ac07820SSatish Balay   *newmat       = 0;
16370ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
16380ac07820SSatish Balay   PLogObjectCreate(mat);
16390ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
16400ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
16410ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
16420ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
16430ac07820SSatish Balay   mat->factor     = matin->factor;
16440ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
16450ac07820SSatish Balay 
16460ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
16470ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
16480ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
16490ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
16500ac07820SSatish Balay 
16510ac07820SSatish Balay   a->bs  = oldmat->bs;
16520ac07820SSatish Balay   a->bs2 = oldmat->bs2;
16530ac07820SSatish Balay   a->mbs = oldmat->mbs;
16540ac07820SSatish Balay   a->nbs = oldmat->nbs;
16550ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
16560ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
16570ac07820SSatish Balay 
16580ac07820SSatish Balay   a->rstart       = oldmat->rstart;
16590ac07820SSatish Balay   a->rend         = oldmat->rend;
16600ac07820SSatish Balay   a->cstart       = oldmat->cstart;
16610ac07820SSatish Balay   a->cend         = oldmat->cend;
16620ac07820SSatish Balay   a->size         = oldmat->size;
16630ac07820SSatish Balay   a->rank         = oldmat->rank;
1664e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
16650ac07820SSatish Balay   a->rowvalues    = 0;
16660ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
166730793edcSSatish Balay   a->barray       = 0;
16680ac07820SSatish Balay 
16690ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
16700ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
16710ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
16720ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
16730ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
16740ac07820SSatish Balay   if (oldmat->colmap) {
16750ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
16760ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
16770ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
16780ac07820SSatish Balay   } else a->colmap = 0;
16790ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
16800ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
16810ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
16820ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
16830ac07820SSatish Balay   } else a->garray = 0;
16840ac07820SSatish Balay 
16850ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
16860ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
16870ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
16880ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
16890ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
16900ac07820SSatish Balay   PLogObjectParent(mat,a->A);
16910ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
16920ac07820SSatish Balay   PLogObjectParent(mat,a->B);
16930ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
16940ac07820SSatish Balay   if (flg) {
16950ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
16960ac07820SSatish Balay   }
16970ac07820SSatish Balay   *newmat = mat;
16980ac07820SSatish Balay   return 0;
16990ac07820SSatish Balay }
170057b952d6SSatish Balay 
170157b952d6SSatish Balay #include "sys.h"
170257b952d6SSatish Balay 
17035615d1e5SSatish Balay #undef __FUNC__
17045615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
170557b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
170657b952d6SSatish Balay {
170757b952d6SSatish Balay   Mat          A;
170857b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
170957b952d6SSatish Balay   Scalar       *vals,*buf;
171057b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
171157b952d6SSatish Balay   MPI_Status   status;
1712cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
171357b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
171457b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
171557b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
171657b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
171757b952d6SSatish Balay 
171857b952d6SSatish Balay 
171957b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
172057b952d6SSatish Balay   bs2  = bs*bs;
172157b952d6SSatish Balay 
172257b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
172357b952d6SSatish Balay   if (!rank) {
172457b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
172557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1726e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
172757b952d6SSatish Balay   }
172857b952d6SSatish Balay 
172957b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
173057b952d6SSatish Balay   M = header[1]; N = header[2];
173157b952d6SSatish Balay 
1732e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
173357b952d6SSatish Balay 
173457b952d6SSatish Balay   /*
173557b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
173657b952d6SSatish Balay      divisible by the blocksize
173757b952d6SSatish Balay   */
173857b952d6SSatish Balay   Mbs        = M/bs;
173957b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
174057b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
174157b952d6SSatish Balay   else                  Mbs++;
174257b952d6SSatish Balay   if (extra_rows &&!rank) {
1743b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
174457b952d6SSatish Balay   }
1745537820f0SBarry Smith 
174657b952d6SSatish Balay   /* determine ownership of all rows */
174757b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
174857b952d6SSatish Balay   m   = mbs * bs;
1749cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1750cee3aa6bSSatish Balay   browners = rowners + size + 1;
175157b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
175257b952d6SSatish Balay   rowners[0] = 0;
1753cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1754cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
175557b952d6SSatish Balay   rstart = rowners[rank];
175657b952d6SSatish Balay   rend   = rowners[rank+1];
175757b952d6SSatish Balay 
175857b952d6SSatish Balay   /* distribute row lengths to all processors */
175957b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
176057b952d6SSatish Balay   if (!rank) {
176157b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
176257b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
176357b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
176457b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1765cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1766cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
176757b952d6SSatish Balay     PetscFree(sndcounts);
176857b952d6SSatish Balay   }
176957b952d6SSatish Balay   else {
177057b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
177157b952d6SSatish Balay   }
177257b952d6SSatish Balay 
177357b952d6SSatish Balay   if (!rank) {
177457b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
177557b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
177657b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
177757b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
177857b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
177957b952d6SSatish Balay         procsnz[i] += rowlengths[j];
178057b952d6SSatish Balay       }
178157b952d6SSatish Balay     }
178257b952d6SSatish Balay     PetscFree(rowlengths);
178357b952d6SSatish Balay 
178457b952d6SSatish Balay     /* determine max buffer needed and allocate it */
178557b952d6SSatish Balay     maxnz = 0;
178657b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
178757b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
178857b952d6SSatish Balay     }
178957b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
179057b952d6SSatish Balay 
179157b952d6SSatish Balay     /* read in my part of the matrix column indices  */
179257b952d6SSatish Balay     nz = procsnz[0];
179357b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
179457b952d6SSatish Balay     mycols = ibuf;
1795cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
179657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1797cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1798cee3aa6bSSatish Balay 
179957b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
180057b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
180157b952d6SSatish Balay       nz = procsnz[i];
180257b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
180357b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
180457b952d6SSatish Balay     }
180557b952d6SSatish Balay     /* read in the stuff for the last proc */
180657b952d6SSatish Balay     if ( size != 1 ) {
180757b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
180857b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
180957b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1810cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
181157b952d6SSatish Balay     }
181257b952d6SSatish Balay     PetscFree(cols);
181357b952d6SSatish Balay   }
181457b952d6SSatish Balay   else {
181557b952d6SSatish Balay     /* determine buffer space needed for message */
181657b952d6SSatish Balay     nz = 0;
181757b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
181857b952d6SSatish Balay       nz += locrowlens[i];
181957b952d6SSatish Balay     }
182057b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
182157b952d6SSatish Balay     mycols = ibuf;
182257b952d6SSatish Balay     /* receive message of column indices*/
182357b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
182457b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1825e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
182657b952d6SSatish Balay   }
182757b952d6SSatish Balay 
182857b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1829cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1830cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
183157b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1832cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
183357b952d6SSatish Balay   masked1 = mask    + Mbs;
183457b952d6SSatish Balay   masked2 = masked1 + Mbs;
183557b952d6SSatish Balay   rowcount = 0; nzcount = 0;
183657b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
183757b952d6SSatish Balay     dcount  = 0;
183857b952d6SSatish Balay     odcount = 0;
183957b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
184057b952d6SSatish Balay       kmax = locrowlens[rowcount];
184157b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
184257b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
184357b952d6SSatish Balay         if (!mask[tmp]) {
184457b952d6SSatish Balay           mask[tmp] = 1;
184557b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
184657b952d6SSatish Balay           else masked1[dcount++] = tmp;
184757b952d6SSatish Balay         }
184857b952d6SSatish Balay       }
184957b952d6SSatish Balay       rowcount++;
185057b952d6SSatish Balay     }
1851cee3aa6bSSatish Balay 
185257b952d6SSatish Balay     dlens[i]  = dcount;
185357b952d6SSatish Balay     odlens[i] = odcount;
1854cee3aa6bSSatish Balay 
185557b952d6SSatish Balay     /* zero out the mask elements we set */
185657b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
185757b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
185857b952d6SSatish Balay   }
1859cee3aa6bSSatish Balay 
186057b952d6SSatish Balay   /* create our matrix */
1861537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1862537820f0SBarry Smith          CHKERRQ(ierr);
186357b952d6SSatish Balay   A = *newmat;
18646d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
186557b952d6SSatish Balay 
186657b952d6SSatish Balay   if (!rank) {
186757b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
186857b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
186957b952d6SSatish Balay     nz = procsnz[0];
187057b952d6SSatish Balay     vals = buf;
1871cee3aa6bSSatish Balay     mycols = ibuf;
1872cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
187357b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1874cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1875537820f0SBarry Smith 
187657b952d6SSatish Balay     /* insert into matrix */
187757b952d6SSatish Balay     jj      = rstart*bs;
187857b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
187957b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
188057b952d6SSatish Balay       mycols += locrowlens[i];
188157b952d6SSatish Balay       vals   += locrowlens[i];
188257b952d6SSatish Balay       jj++;
188357b952d6SSatish Balay     }
188457b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
188557b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
188657b952d6SSatish Balay       nz = procsnz[i];
188757b952d6SSatish Balay       vals = buf;
188857b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
188957b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
189057b952d6SSatish Balay     }
189157b952d6SSatish Balay     /* the last proc */
189257b952d6SSatish Balay     if ( size != 1 ){
189357b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1894cee3aa6bSSatish Balay       vals = buf;
189557b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
189657b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1897cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
189857b952d6SSatish Balay     }
189957b952d6SSatish Balay     PetscFree(procsnz);
190057b952d6SSatish Balay   }
190157b952d6SSatish Balay   else {
190257b952d6SSatish Balay     /* receive numeric values */
190357b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
190457b952d6SSatish Balay 
190557b952d6SSatish Balay     /* receive message of values*/
190657b952d6SSatish Balay     vals = buf;
1907cee3aa6bSSatish Balay     mycols = ibuf;
190857b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
190957b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1910e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
191157b952d6SSatish Balay 
191257b952d6SSatish Balay     /* insert into matrix */
191357b952d6SSatish Balay     jj      = rstart*bs;
1914cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
191557b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
191657b952d6SSatish Balay       mycols += locrowlens[i];
191757b952d6SSatish Balay       vals   += locrowlens[i];
191857b952d6SSatish Balay       jj++;
191957b952d6SSatish Balay     }
192057b952d6SSatish Balay   }
192157b952d6SSatish Balay   PetscFree(locrowlens);
192257b952d6SSatish Balay   PetscFree(buf);
192357b952d6SSatish Balay   PetscFree(ibuf);
192457b952d6SSatish Balay   PetscFree(rowners);
192557b952d6SSatish Balay   PetscFree(dlens);
1926cee3aa6bSSatish Balay   PetscFree(mask);
19276d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19286d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
192957b952d6SSatish Balay   return 0;
193057b952d6SSatish Balay }
193157b952d6SSatish Balay 
193257b952d6SSatish Balay 
1933