xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a5eb49655b3fdf389f9d65fc4214d6e1c240a941)
1*a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*a5eb4965SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.72 1997/06/03 16:40:50 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2257b952d6SSatish Balay 
233b2fbd54SBarry Smith 
24537820f0SBarry Smith /*
25537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2857b952d6SSatish Balay    length of colmap equals the global matrix length.
2957b952d6SSatish Balay */
305615d1e5SSatish Balay #undef __FUNC__
315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3357b952d6SSatish Balay {
3457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
36928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3757b952d6SSatish Balay 
38758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4057b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
41928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4257b952d6SSatish Balay   return 0;
4357b952d6SSatish Balay }
4457b952d6SSatish Balay 
455615d1e5SSatish Balay #undef __FUNC__
465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
483b2fbd54SBarry Smith                             PetscTruth *done)
4957b952d6SSatish Balay {
503b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5157b952d6SSatish Balay   int         ierr;
523b2fbd54SBarry Smith   if (aij->size == 1) {
533b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
553b2fbd54SBarry Smith   return 0;
563b2fbd54SBarry Smith }
573b2fbd54SBarry Smith 
585615d1e5SSatish Balay #undef __FUNC__
595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
613b2fbd54SBarry Smith                                 PetscTruth *done)
623b2fbd54SBarry Smith {
633b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
643b2fbd54SBarry Smith   int        ierr;
653b2fbd54SBarry Smith   if (aij->size == 1) {
663b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
67e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6857b952d6SSatish Balay   return 0;
6957b952d6SSatish Balay }
7080c1aa95SSatish Balay #define CHUNKSIZE  10
7180c1aa95SSatish Balay 
72f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7380c1aa95SSatish Balay { \
7480c1aa95SSatish Balay  \
7580c1aa95SSatish Balay     brow = row/bs;  \
7680c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
77ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
7880c1aa95SSatish Balay       bcol = col/bs; \
7980c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
80ab26458aSBarry Smith       low = 0; high = nrow; \
81ab26458aSBarry Smith       while (high-low > 3) { \
82ab26458aSBarry Smith         t = (low+high)/2; \
83ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
84ab26458aSBarry Smith         else              low  = t; \
85ab26458aSBarry Smith       } \
86ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
8780c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8880c1aa95SSatish Balay         if (rp[_i] == bcol) { \
8980c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
90eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
91eada6651SSatish Balay           else                    *bap  = value;  \
92ac7a638eSSatish Balay           goto a_noinsert; \
9380c1aa95SSatish Balay         } \
9480c1aa95SSatish Balay       } \
9589280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
9611ebbc71SLois Curfman McInnes       else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
9780c1aa95SSatish Balay       if (nrow >= rmax) { \
9880c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9980c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
10080c1aa95SSatish Balay         Scalar *new_a; \
10180c1aa95SSatish Balay  \
10211ebbc71SLois Curfman McInnes         if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
10389280ab3SLois Curfman McInnes  \
10480c1aa95SSatish Balay         /* malloc new storage space */ \
10580c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
10680c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
10780c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10880c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10980c1aa95SSatish Balay  \
11080c1aa95SSatish Balay         /* copy over old data into new slots */ \
11180c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
11280c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
11380c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
11480c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
11580c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
11680c1aa95SSatish Balay                                                            len*sizeof(int)); \
11780c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11880c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11980c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
12080c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
12180c1aa95SSatish Balay         /* free up old matrix storage */ \
12280c1aa95SSatish Balay         PetscFree(a->a);  \
12380c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
12480c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
12580c1aa95SSatish Balay         a->singlemalloc = 1; \
12680c1aa95SSatish Balay  \
12780c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
128ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
12980c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
13080c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
13180c1aa95SSatish Balay         a->reallocs++; \
13280c1aa95SSatish Balay         a->nz++; \
13380c1aa95SSatish Balay       } \
13480c1aa95SSatish Balay       N = nrow++ - 1;  \
13580c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
13680c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
13780c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13880c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13980c1aa95SSatish Balay       } \
14080c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
14180c1aa95SSatish Balay       rp[_i]                      = bcol;  \
14280c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
143ac7a638eSSatish Balay       a_noinsert:; \
14480c1aa95SSatish Balay     ailen[brow] = nrow; \
14580c1aa95SSatish Balay }
14657b952d6SSatish Balay 
147ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
148ac7a638eSSatish Balay { \
149ac7a638eSSatish Balay  \
150ac7a638eSSatish Balay     brow = row/bs;  \
151ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
153ac7a638eSSatish Balay       bcol = col/bs; \
154ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
155ac7a638eSSatish Balay       low = 0; high = nrow; \
156ac7a638eSSatish Balay       while (high-low > 3) { \
157ac7a638eSSatish Balay         t = (low+high)/2; \
158ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
159ac7a638eSSatish Balay         else              low  = t; \
160ac7a638eSSatish Balay       } \
161ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
162ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
163ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
164ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
165ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
166ac7a638eSSatish Balay           else                    *bap  = value;  \
167ac7a638eSSatish Balay           goto b_noinsert; \
168ac7a638eSSatish Balay         } \
169ac7a638eSSatish Balay       } \
17089280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
17111ebbc71SLois Curfman McInnes       else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
172ac7a638eSSatish Balay       if (nrow >= rmax) { \
173ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
17474c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
175ac7a638eSSatish Balay         Scalar *new_a; \
176ac7a638eSSatish Balay  \
17711ebbc71SLois Curfman McInnes         if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
17889280ab3SLois Curfman McInnes  \
179ac7a638eSSatish Balay         /* malloc new storage space */ \
18074c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
181ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
182ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
183ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
184ac7a638eSSatish Balay  \
185ac7a638eSSatish Balay         /* copy over old data into new slots */ \
186ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
18774c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
188ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
189ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
190ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
191ac7a638eSSatish Balay                                                            len*sizeof(int)); \
192ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
193ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
194ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
195ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
196ac7a638eSSatish Balay         /* free up old matrix storage */ \
19774c639caSSatish Balay         PetscFree(b->a);  \
19874c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
19974c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
20074c639caSSatish Balay         b->singlemalloc = 1; \
201ac7a638eSSatish Balay  \
202ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
203ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
20474c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
20574c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
20674c639caSSatish Balay         b->reallocs++; \
20774c639caSSatish Balay         b->nz++; \
208ac7a638eSSatish Balay       } \
209ac7a638eSSatish Balay       N = nrow++ - 1;  \
210ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
211ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
212ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
213ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
214ac7a638eSSatish Balay       } \
215ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
216ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
217ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
218ac7a638eSSatish Balay       b_noinsert:; \
219ac7a638eSSatish Balay     bilen[brow] = nrow; \
220ac7a638eSSatish Balay }
221ac7a638eSSatish Balay 
2225615d1e5SSatish Balay #undef __FUNC__
2235615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
224ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
22557b952d6SSatish Balay {
22657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
22757b952d6SSatish Balay   Scalar      value;
2284fa0d573SSatish Balay   int         ierr,i,j,row,col;
2294fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2304fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2314fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
23257b952d6SSatish Balay 
233eada6651SSatish Balay   /* Some Variables required in the macro */
23480c1aa95SSatish Balay   Mat         A = baij->A;
23580c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
236ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
237ac7a638eSSatish Balay   Scalar      *aa=a->a;
238ac7a638eSSatish Balay 
239ac7a638eSSatish Balay   Mat         B = baij->B;
240ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
241ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
242ac7a638eSSatish Balay   Scalar      *ba=b->a;
243ac7a638eSSatish Balay 
244ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
245ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
246ac7a638eSSatish Balay   Scalar      *ap,*bap;
24780c1aa95SSatish Balay 
24857b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
249639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
250e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
251e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
252639f9d9dSBarry Smith #endif
25357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
25457b952d6SSatish Balay       row = im[i] - rstart_orig;
25557b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
25657b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
25757b952d6SSatish Balay           col = in[j] - cstart_orig;
25857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
26080c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
26157b952d6SSatish Balay         }
262639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
263e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
264e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
265639f9d9dSBarry Smith #endif
26657b952d6SSatish Balay         else {
26757b952d6SSatish Balay           if (mat->was_assembled) {
268905e6a2fSBarry Smith             if (!baij->colmap) {
269905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
270905e6a2fSBarry Smith             }
271905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
27257b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
27357b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
27457b952d6SSatish Balay               col =  in[j];
2759bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2769bf004c3SSatish Balay               B = baij->B;
2779bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2789bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2799bf004c3SSatish Balay               ba=b->a;
28057b952d6SSatish Balay             }
28157b952d6SSatish Balay           }
28257b952d6SSatish Balay           else col = in[j];
28357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
284ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
285ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
28657b952d6SSatish Balay         }
28757b952d6SSatish Balay       }
28857b952d6SSatish Balay     }
28957b952d6SSatish Balay     else {
29090f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
29157b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
29257b952d6SSatish Balay       }
29357b952d6SSatish Balay       else {
29490f02eecSBarry Smith         if (!baij->donotstash) {
29557b952d6SSatish Balay           row = im[i];
29657b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
29757b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
29857b952d6SSatish Balay           }
29957b952d6SSatish Balay         }
30057b952d6SSatish Balay       }
30157b952d6SSatish Balay     }
30290f02eecSBarry Smith   }
30357b952d6SSatish Balay   return 0;
30457b952d6SSatish Balay }
30557b952d6SSatish Balay 
306ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
307ab26458aSBarry Smith #undef __FUNC__
308ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
309ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
310ab26458aSBarry Smith {
311ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
31230793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
313abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
314ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
315ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
316ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
317ab26458aSBarry Smith 
31830793edcSSatish Balay 
31930793edcSSatish Balay   if(!barray) {
32030793edcSSatish Balay     barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
32130793edcSSatish Balay     baij->barray = barray;
32230793edcSSatish Balay   }
32330793edcSSatish Balay 
324ab26458aSBarry Smith   if (roworiented) {
325ab26458aSBarry Smith     stepval = (n-1)*bs;
326ab26458aSBarry Smith   } else {
327ab26458aSBarry Smith     stepval = (m-1)*bs;
328ab26458aSBarry Smith   }
329ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
330ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
331ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
332ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
333ab26458aSBarry Smith #endif
334ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
335ab26458aSBarry Smith       row = im[i] - rstart;
336ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
33715b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
33815b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
33915b57d14SSatish Balay           barray = v + i*bs2;
34015b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
34115b57d14SSatish Balay           barray = v + j*bs2;
34215b57d14SSatish Balay         } else { /* Here a copy is required */
343ab26458aSBarry Smith           if (roworiented) {
344ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
345ab26458aSBarry Smith           } else {
346ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
347abef11f7SSatish Balay           }
348ab26458aSBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval )
349ab26458aSBarry Smith             for (jj=0; jj<bs; jj++ )
35030793edcSSatish Balay               *barray++  = *value++;
35130793edcSSatish Balay           barray -=bs2;
35215b57d14SSatish Balay         }
353abef11f7SSatish Balay 
354abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
355abef11f7SSatish Balay           col = in[j] - cstart;
35630793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
357ab26458aSBarry Smith         }
358ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
359ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
360ab26458aSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");}
361ab26458aSBarry Smith #endif
362ab26458aSBarry Smith         else {
363ab26458aSBarry Smith           if (mat->was_assembled) {
364ab26458aSBarry Smith             if (!baij->colmap) {
365ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
366ab26458aSBarry Smith             }
367*a5eb4965SSatish Balay 
368*a5eb4965SSatish Balay #if defined(PETSC_BOPT_g)
369*a5eb4965SSatish Balay             if ((baij->colmap[in[j]] - 1)%bs) {SETERRQ(1,0,"Incorrect colmap");}
370*a5eb4965SSatish Balay #endif
371*a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
372ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
373ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
374ab26458aSBarry Smith               col =  in[j];
375ab26458aSBarry Smith             }
376ab26458aSBarry Smith           }
377ab26458aSBarry Smith           else col = in[j];
37830793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
379ab26458aSBarry Smith         }
380ab26458aSBarry Smith       }
381ab26458aSBarry Smith     }
382ab26458aSBarry Smith     else {
383ab26458aSBarry Smith       if (!baij->donotstash) {
384ab26458aSBarry Smith         if (roworiented ) {
385abef11f7SSatish Balay           row   = im[i]*bs;
386abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
387abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
388abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
389abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
390abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
391abef11f7SSatish Balay               }
392ab26458aSBarry Smith             }
393ab26458aSBarry Smith           }
394ab26458aSBarry Smith         }
395ab26458aSBarry Smith         else {
396ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
397abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
398abef11f7SSatish Balay             col   = in[j]*bs;
399abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
400abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
401abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
402ab26458aSBarry Smith               }
403ab26458aSBarry Smith             }
404ab26458aSBarry Smith           }
405abef11f7SSatish Balay 
406abef11f7SSatish Balay         }
407abef11f7SSatish Balay       }
408ab26458aSBarry Smith     }
409ab26458aSBarry Smith   }
410ab26458aSBarry Smith   return 0;
411ab26458aSBarry Smith }
412ab26458aSBarry Smith 
4135615d1e5SSatish Balay #undef __FUNC__
4145615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
415ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
416d6de1c52SSatish Balay {
417d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
418d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
419d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
420d6de1c52SSatish Balay 
421d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
422e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
423e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
424d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
425d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
426d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
427e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
428e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
429d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
430d6de1c52SSatish Balay           col = idxn[j] - bscstart;
431d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
432d6de1c52SSatish Balay         }
433d6de1c52SSatish Balay         else {
434905e6a2fSBarry Smith           if (!baij->colmap) {
435905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
436905e6a2fSBarry Smith           }
437e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
438dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
439d9d09a02SSatish Balay           else {
440dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
441d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
442d6de1c52SSatish Balay           }
443d6de1c52SSatish Balay         }
444d6de1c52SSatish Balay       }
445d9d09a02SSatish Balay     }
446d6de1c52SSatish Balay     else {
447e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
448d6de1c52SSatish Balay     }
449d6de1c52SSatish Balay   }
450d6de1c52SSatish Balay   return 0;
451d6de1c52SSatish Balay }
452d6de1c52SSatish Balay 
4535615d1e5SSatish Balay #undef __FUNC__
4545615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
455ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
456d6de1c52SSatish Balay {
457d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
458d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
459acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
460d6de1c52SSatish Balay   double     sum = 0.0;
461d6de1c52SSatish Balay   Scalar     *v;
462d6de1c52SSatish Balay 
463d6de1c52SSatish Balay   if (baij->size == 1) {
464d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
465d6de1c52SSatish Balay   } else {
466d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
467d6de1c52SSatish Balay       v = amat->a;
468d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
469d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
470d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
471d6de1c52SSatish Balay #else
472d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
473d6de1c52SSatish Balay #endif
474d6de1c52SSatish Balay       }
475d6de1c52SSatish Balay       v = bmat->a;
476d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
477d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
478d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
479d6de1c52SSatish Balay #else
480d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
481d6de1c52SSatish Balay #endif
482d6de1c52SSatish Balay       }
483d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
484d6de1c52SSatish Balay       *norm = sqrt(*norm);
485d6de1c52SSatish Balay     }
486acdf5bf4SSatish Balay     else
487e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
488d6de1c52SSatish Balay   }
489d6de1c52SSatish Balay   return 0;
490d6de1c52SSatish Balay }
49157b952d6SSatish Balay 
4925615d1e5SSatish Balay #undef __FUNC__
4935615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
494ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
49557b952d6SSatish Balay {
49657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
49757b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
49857b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
49957b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
50057b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
50157b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
50257b952d6SSatish Balay   InsertMode  addv;
50357b952d6SSatish Balay   Scalar      *rvalues,*svalues;
50457b952d6SSatish Balay 
50557b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
506e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
50757b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
508e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
50957b952d6SSatish Balay   }
510e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
51157b952d6SSatish Balay 
51257b952d6SSatish Balay   /*  first count number of contributors to each processor */
51357b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
51457b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
51557b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
51657b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
51757b952d6SSatish Balay     idx = baij->stash.idx[i];
51857b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
51957b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
52057b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
52157b952d6SSatish Balay       }
52257b952d6SSatish Balay     }
52357b952d6SSatish Balay   }
52457b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
52557b952d6SSatish Balay 
52657b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
52757b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
52857b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
52957b952d6SSatish Balay   nreceives = work[rank];
53057b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
53157b952d6SSatish Balay   nmax = work[rank];
53257b952d6SSatish Balay   PetscFree(work);
53357b952d6SSatish Balay 
53457b952d6SSatish Balay   /* post receives:
53557b952d6SSatish Balay        1) each message will consist of ordered pairs
53657b952d6SSatish Balay      (global index,value) we store the global index as a double
53757b952d6SSatish Balay      to simplify the message passing.
53857b952d6SSatish Balay        2) since we don't know how long each individual message is we
53957b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
54057b952d6SSatish Balay      this is a lot of wasted space.
54157b952d6SSatish Balay 
54257b952d6SSatish Balay 
54357b952d6SSatish Balay        This could be done better.
54457b952d6SSatish Balay   */
54557b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
54657b952d6SSatish Balay   CHKPTRQ(rvalues);
54757b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
54857b952d6SSatish Balay   CHKPTRQ(recv_waits);
54957b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
55057b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
55157b952d6SSatish Balay               comm,recv_waits+i);
55257b952d6SSatish Balay   }
55357b952d6SSatish Balay 
55457b952d6SSatish Balay   /* do sends:
55557b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
55657b952d6SSatish Balay          the ith processor
55757b952d6SSatish Balay   */
55857b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
55957b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
56057b952d6SSatish Balay   CHKPTRQ(send_waits);
56157b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
56257b952d6SSatish Balay   starts[0] = 0;
56357b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
56457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
56557b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
56657b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
56757b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
56857b952d6SSatish Balay   }
56957b952d6SSatish Balay   PetscFree(owner);
57057b952d6SSatish Balay   starts[0] = 0;
57157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
57257b952d6SSatish Balay   count = 0;
57357b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
57457b952d6SSatish Balay     if (procs[i]) {
57557b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
57657b952d6SSatish Balay                 comm,send_waits+count++);
57757b952d6SSatish Balay     }
57857b952d6SSatish Balay   }
57957b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
58057b952d6SSatish Balay 
58157b952d6SSatish Balay   /* Free cache space */
582d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
58357b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
58457b952d6SSatish Balay 
58557b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
58657b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
58757b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
58857b952d6SSatish Balay   baij->rmax       = nmax;
58957b952d6SSatish Balay 
59057b952d6SSatish Balay   return 0;
59157b952d6SSatish Balay }
59257b952d6SSatish Balay 
59357b952d6SSatish Balay 
5945615d1e5SSatish Balay #undef __FUNC__
5955615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
596ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
59757b952d6SSatish Balay {
59857b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
59957b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
60057b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
60157b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
60257b952d6SSatish Balay   Scalar      *values,val;
603e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
60457b952d6SSatish Balay 
60557b952d6SSatish Balay   /*  wait on receives */
60657b952d6SSatish Balay   while (count) {
60757b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
60857b952d6SSatish Balay     /* unpack receives into our local space */
60957b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
61057b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
61157b952d6SSatish Balay     n = n/3;
61257b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
61357b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
61457b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
61557b952d6SSatish Balay       val = values[3*i+2];
61657b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
61757b952d6SSatish Balay         col -= baij->cstart*bs;
6186fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
61957b952d6SSatish Balay       }
62057b952d6SSatish Balay       else {
62157b952d6SSatish Balay         if (mat->was_assembled) {
622905e6a2fSBarry Smith           if (!baij->colmap) {
623905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
624905e6a2fSBarry Smith           }
625*a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
62657b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
62757b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
62857b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
62957b952d6SSatish Balay           }
63057b952d6SSatish Balay         }
6316fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
63257b952d6SSatish Balay       }
63357b952d6SSatish Balay     }
63457b952d6SSatish Balay     count--;
63557b952d6SSatish Balay   }
63657b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
63757b952d6SSatish Balay 
63857b952d6SSatish Balay   /* wait on sends */
63957b952d6SSatish Balay   if (baij->nsends) {
64057b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
64157b952d6SSatish Balay     CHKPTRQ(send_status);
64257b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
64357b952d6SSatish Balay     PetscFree(send_status);
64457b952d6SSatish Balay   }
64557b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
64657b952d6SSatish Balay 
64757b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
64857b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
64957b952d6SSatish Balay 
65057b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
65157b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
65257b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
65357b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
65457b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
65557b952d6SSatish Balay   }
65657b952d6SSatish Balay 
6576d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
65857b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
65957b952d6SSatish Balay   }
66057b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
66157b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
66257b952d6SSatish Balay 
66357b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
66457b952d6SSatish Balay   return 0;
66557b952d6SSatish Balay }
66657b952d6SSatish Balay 
6675615d1e5SSatish Balay #undef __FUNC__
6685615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
66957b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
67057b952d6SSatish Balay {
67157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
67257b952d6SSatish Balay   int          ierr;
67357b952d6SSatish Balay 
67457b952d6SSatish Balay   if (baij->size == 1) {
67557b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
67657b952d6SSatish Balay   }
677e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
67857b952d6SSatish Balay   return 0;
67957b952d6SSatish Balay }
68057b952d6SSatish Balay 
6815615d1e5SSatish Balay #undef __FUNC__
6825615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
68357b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
68457b952d6SSatish Balay {
68557b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
686cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
68757b952d6SSatish Balay   FILE         *fd;
68857b952d6SSatish Balay   ViewerType   vtype;
68957b952d6SSatish Balay 
69057b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
69157b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
69257b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
693639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
6944e220ebcSLois Curfman McInnes       MatInfo info;
69557b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
69657b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6974e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
69857b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
69957b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
7004e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
7014e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
7024e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
7034e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
7044e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
7054e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
70657b952d6SSatish Balay       fflush(fd);
70757b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
70857b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
70957b952d6SSatish Balay       return 0;
71057b952d6SSatish Balay     }
711639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
712bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
71357b952d6SSatish Balay       return 0;
71457b952d6SSatish Balay     }
71557b952d6SSatish Balay   }
71657b952d6SSatish Balay 
71757b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
71857b952d6SSatish Balay     Draw       draw;
71957b952d6SSatish Balay     PetscTruth isnull;
72057b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
72157b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
72257b952d6SSatish Balay   }
72357b952d6SSatish Balay 
72457b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
72557b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
72657b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
72757b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
72857b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
72957b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
73057b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
73157b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
73257b952d6SSatish Balay     fflush(fd);
73357b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
73457b952d6SSatish Balay   }
73557b952d6SSatish Balay   else {
73657b952d6SSatish Balay     int size = baij->size;
73757b952d6SSatish Balay     rank = baij->rank;
73857b952d6SSatish Balay     if (size == 1) {
73957b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
74057b952d6SSatish Balay     }
74157b952d6SSatish Balay     else {
74257b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
74357b952d6SSatish Balay       Mat         A;
74457b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
74557b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
74657b952d6SSatish Balay       int         mbs=baij->mbs;
74757b952d6SSatish Balay       Scalar      *a;
74857b952d6SSatish Balay 
74957b952d6SSatish Balay       if (!rank) {
750cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
75157b952d6SSatish Balay         CHKERRQ(ierr);
75257b952d6SSatish Balay       }
75357b952d6SSatish Balay       else {
754cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
75557b952d6SSatish Balay         CHKERRQ(ierr);
75657b952d6SSatish Balay       }
75757b952d6SSatish Balay       PLogObjectParent(mat,A);
75857b952d6SSatish Balay 
75957b952d6SSatish Balay       /* copy over the A part */
76057b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
76157b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
76257b952d6SSatish Balay       row = baij->rstart;
76357b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
76457b952d6SSatish Balay 
76557b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
76657b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
76757b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
76857b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
76957b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
77057b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
771cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
772cee3aa6bSSatish Balay             col++; a += bs;
77357b952d6SSatish Balay           }
77457b952d6SSatish Balay         }
77557b952d6SSatish Balay       }
77657b952d6SSatish Balay       /* copy over the B part */
77757b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
77857b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
77957b952d6SSatish Balay       row = baij->rstart*bs;
78057b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
78157b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
78257b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
78357b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
78457b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
78557b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
786cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
787cee3aa6bSSatish Balay             col++; a += bs;
78857b952d6SSatish Balay           }
78957b952d6SSatish Balay         }
79057b952d6SSatish Balay       }
79157b952d6SSatish Balay       PetscFree(rvals);
7926d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7936d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
79457b952d6SSatish Balay       if (!rank) {
79557b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
79657b952d6SSatish Balay       }
79757b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
79857b952d6SSatish Balay     }
79957b952d6SSatish Balay   }
80057b952d6SSatish Balay   return 0;
80157b952d6SSatish Balay }
80257b952d6SSatish Balay 
80357b952d6SSatish Balay 
80457b952d6SSatish Balay 
8055615d1e5SSatish Balay #undef __FUNC__
8065615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
807ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
80857b952d6SSatish Balay {
80957b952d6SSatish Balay   Mat         mat = (Mat) obj;
81057b952d6SSatish Balay   int         ierr;
81157b952d6SSatish Balay   ViewerType  vtype;
81257b952d6SSatish Balay 
81357b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
81457b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
81557b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
81657b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
81757b952d6SSatish Balay   }
81857b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
81957b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
82057b952d6SSatish Balay   }
82157b952d6SSatish Balay   return 0;
82257b952d6SSatish Balay }
82357b952d6SSatish Balay 
8245615d1e5SSatish Balay #undef __FUNC__
8255615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
826ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
82779bdfe76SSatish Balay {
82879bdfe76SSatish Balay   Mat         mat = (Mat) obj;
82979bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
83079bdfe76SSatish Balay   int         ierr;
83179bdfe76SSatish Balay 
83279bdfe76SSatish Balay #if defined(PETSC_LOG)
83379bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
83479bdfe76SSatish Balay #endif
83579bdfe76SSatish Balay 
83679bdfe76SSatish Balay   PetscFree(baij->rowners);
83779bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
83879bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
83979bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
84079bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
84179bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
84279bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
84379bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
84430793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
84579bdfe76SSatish Balay   PetscFree(baij);
84690f02eecSBarry Smith   if (mat->mapping) {
84790f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
84890f02eecSBarry Smith   }
84979bdfe76SSatish Balay   PLogObjectDestroy(mat);
85079bdfe76SSatish Balay   PetscHeaderDestroy(mat);
85179bdfe76SSatish Balay   return 0;
85279bdfe76SSatish Balay }
85379bdfe76SSatish Balay 
8545615d1e5SSatish Balay #undef __FUNC__
8555615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
856ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
857cee3aa6bSSatish Balay {
858cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
85947b4a8eaSLois Curfman McInnes   int         ierr, nt;
860cee3aa6bSSatish Balay 
861c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
86247b4a8eaSLois Curfman McInnes   if (nt != a->n) {
863ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
86447b4a8eaSLois Curfman McInnes   }
865c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
86647b4a8eaSLois Curfman McInnes   if (nt != a->m) {
867e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
86847b4a8eaSLois Curfman McInnes   }
86943a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
870cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
87143a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
872cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
87343a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
874cee3aa6bSSatish Balay   return 0;
875cee3aa6bSSatish Balay }
876cee3aa6bSSatish Balay 
8775615d1e5SSatish Balay #undef __FUNC__
8785615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
879ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
880cee3aa6bSSatish Balay {
881cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
882cee3aa6bSSatish Balay   int        ierr;
88343a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
884cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
88543a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
886cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
887cee3aa6bSSatish Balay   return 0;
888cee3aa6bSSatish Balay }
889cee3aa6bSSatish Balay 
8905615d1e5SSatish Balay #undef __FUNC__
8915615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
892ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
893cee3aa6bSSatish Balay {
894cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
895cee3aa6bSSatish Balay   int        ierr;
896cee3aa6bSSatish Balay 
897cee3aa6bSSatish Balay   /* do nondiagonal part */
898cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
899cee3aa6bSSatish Balay   /* send it on its way */
900537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
901cee3aa6bSSatish Balay   /* do local part */
902cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
903cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
904cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
905cee3aa6bSSatish Balay   /* but is not perhaps always true. */
906639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
907cee3aa6bSSatish Balay   return 0;
908cee3aa6bSSatish Balay }
909cee3aa6bSSatish Balay 
9105615d1e5SSatish Balay #undef __FUNC__
9115615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
912ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
913cee3aa6bSSatish Balay {
914cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
915cee3aa6bSSatish Balay   int        ierr;
916cee3aa6bSSatish Balay 
917cee3aa6bSSatish Balay   /* do nondiagonal part */
918cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
919cee3aa6bSSatish Balay   /* send it on its way */
920537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
921cee3aa6bSSatish Balay   /* do local part */
922cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
923cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
924cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
925cee3aa6bSSatish Balay   /* but is not perhaps always true. */
926537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
927cee3aa6bSSatish Balay   return 0;
928cee3aa6bSSatish Balay }
929cee3aa6bSSatish Balay 
930cee3aa6bSSatish Balay /*
931cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
932cee3aa6bSSatish Balay    diagonal block
933cee3aa6bSSatish Balay */
9345615d1e5SSatish Balay #undef __FUNC__
9355615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
936ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
937cee3aa6bSSatish Balay {
938cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
939cee3aa6bSSatish Balay   if (a->M != a->N)
940e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
941cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
942cee3aa6bSSatish Balay }
943cee3aa6bSSatish Balay 
9445615d1e5SSatish Balay #undef __FUNC__
9455615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
946ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
947cee3aa6bSSatish Balay {
948cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
949cee3aa6bSSatish Balay   int        ierr;
950cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
951cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
952cee3aa6bSSatish Balay   return 0;
953cee3aa6bSSatish Balay }
954026e39d0SSatish Balay 
9555615d1e5SSatish Balay #undef __FUNC__
9565615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
957ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
95857b952d6SSatish Balay {
95957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
96057b952d6SSatish Balay   *m = mat->M; *n = mat->N;
96157b952d6SSatish Balay   return 0;
96257b952d6SSatish Balay }
96357b952d6SSatish Balay 
9645615d1e5SSatish Balay #undef __FUNC__
9655615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
966ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
96757b952d6SSatish Balay {
96857b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
96957b952d6SSatish Balay   *m = mat->m; *n = mat->N;
97057b952d6SSatish Balay   return 0;
97157b952d6SSatish Balay }
97257b952d6SSatish Balay 
9735615d1e5SSatish Balay #undef __FUNC__
9745615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
975ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
97657b952d6SSatish Balay {
97757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
97857b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
97957b952d6SSatish Balay   return 0;
98057b952d6SSatish Balay }
98157b952d6SSatish Balay 
982acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
983acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
984acdf5bf4SSatish Balay 
9855615d1e5SSatish Balay #undef __FUNC__
9865615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
987acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
988acdf5bf4SSatish Balay {
989acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
990acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
991acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
992d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
993d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
994acdf5bf4SSatish Balay 
995e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
996acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
997acdf5bf4SSatish Balay 
998acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
999acdf5bf4SSatish Balay     /*
1000acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1001acdf5bf4SSatish Balay     */
1002acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1003bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1004bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1005acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1006acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1007acdf5bf4SSatish Balay     }
1008acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1009acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1010acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1011acdf5bf4SSatish Balay   }
1012acdf5bf4SSatish Balay 
1013acdf5bf4SSatish Balay 
1014e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
1015d9d09a02SSatish Balay   lrow = row - brstart;
1016acdf5bf4SSatish Balay 
1017acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1018acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1019acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1020acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1021acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1022acdf5bf4SSatish Balay   nztot = nzA + nzB;
1023acdf5bf4SSatish Balay 
1024acdf5bf4SSatish Balay   cmap  = mat->garray;
1025acdf5bf4SSatish Balay   if (v  || idx) {
1026acdf5bf4SSatish Balay     if (nztot) {
1027acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1028acdf5bf4SSatish Balay       int imark = -1;
1029acdf5bf4SSatish Balay       if (v) {
1030acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1031acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1032d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1033acdf5bf4SSatish Balay           else break;
1034acdf5bf4SSatish Balay         }
1035acdf5bf4SSatish Balay         imark = i;
1036acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1037acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1038acdf5bf4SSatish Balay       }
1039acdf5bf4SSatish Balay       if (idx) {
1040acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1041acdf5bf4SSatish Balay         if (imark > -1) {
1042acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1043bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1044acdf5bf4SSatish Balay           }
1045acdf5bf4SSatish Balay         } else {
1046acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1047d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1048d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1049acdf5bf4SSatish Balay             else break;
1050acdf5bf4SSatish Balay           }
1051acdf5bf4SSatish Balay           imark = i;
1052acdf5bf4SSatish Balay         }
1053d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1054d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1055acdf5bf4SSatish Balay       }
1056acdf5bf4SSatish Balay     }
1057d212a18eSSatish Balay     else {
1058d212a18eSSatish Balay       if (idx) *idx = 0;
1059d212a18eSSatish Balay       if (v)   *v   = 0;
1060d212a18eSSatish Balay     }
1061acdf5bf4SSatish Balay   }
1062acdf5bf4SSatish Balay   *nz = nztot;
1063acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1064acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1065acdf5bf4SSatish Balay   return 0;
1066acdf5bf4SSatish Balay }
1067acdf5bf4SSatish Balay 
10685615d1e5SSatish Balay #undef __FUNC__
10695615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1070acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1071acdf5bf4SSatish Balay {
1072acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1073acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1074e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
1075acdf5bf4SSatish Balay   }
1076acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
1077acdf5bf4SSatish Balay   return 0;
1078acdf5bf4SSatish Balay }
1079acdf5bf4SSatish Balay 
10805615d1e5SSatish Balay #undef __FUNC__
10815615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1082ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
10835a838052SSatish Balay {
10845a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
10855a838052SSatish Balay   *bs = baij->bs;
10865a838052SSatish Balay   return 0;
10875a838052SSatish Balay }
10885a838052SSatish Balay 
10895615d1e5SSatish Balay #undef __FUNC__
10905615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1091ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
109258667388SSatish Balay {
109358667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
109458667388SSatish Balay   int         ierr;
109558667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
109658667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
109758667388SSatish Balay   return 0;
109858667388SSatish Balay }
10990ac07820SSatish Balay 
11005615d1e5SSatish Balay #undef __FUNC__
11015615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1102ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
11030ac07820SSatish Balay {
11044e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
11054e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
11067d57db60SLois Curfman McInnes   int         ierr;
11077d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
11080ac07820SSatish Balay 
11094e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
11104e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
11114e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
11124e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
11134e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
11144e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
11154e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
11164e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
11174e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
11180ac07820SSatish Balay   if (flag == MAT_LOCAL) {
11194e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
11204e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
11214e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
11224e220ebcSLois Curfman McInnes     info->memory       = isend[3];
11234e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
11240ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1125dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
11264e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11274e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11284e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11294e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11304e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11310ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1132dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
11334e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11344e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11354e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11364e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11374e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11380ac07820SSatish Balay   }
11394e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
11404e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
11414e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
11420ac07820SSatish Balay   return 0;
11430ac07820SSatish Balay }
11440ac07820SSatish Balay 
11455615d1e5SSatish Balay #undef __FUNC__
11465615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1147ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
114858667388SSatish Balay {
114958667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
115058667388SSatish Balay 
115158667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
115258667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
11536da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1154c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
115596854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1156c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1157b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1158b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1159b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1160aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
116158667388SSatish Balay         MatSetOption(a->A,op);
116258667388SSatish Balay         MatSetOption(a->B,op);
1163b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
11646da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
116558667388SSatish Balay              op == MAT_SYMMETRIC ||
116658667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
116758667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
116858667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
116958667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
117058667388SSatish Balay     a->roworiented = 0;
117158667388SSatish Balay     MatSetOption(a->A,op);
117258667388SSatish Balay     MatSetOption(a->B,op);
11732b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
117490f02eecSBarry Smith     a->donotstash = 1;
117590f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1176e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
117758667388SSatish Balay   else
1178e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
117958667388SSatish Balay   return 0;
118058667388SSatish Balay }
118158667388SSatish Balay 
11825615d1e5SSatish Balay #undef __FUNC__
11835615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1184ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
11850ac07820SSatish Balay {
11860ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
11870ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
11880ac07820SSatish Balay   Mat        B;
11890ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
11900ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
11910ac07820SSatish Balay   Scalar     *a;
11920ac07820SSatish Balay 
11930ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1194e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
11950ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
11960ac07820SSatish Balay   CHKERRQ(ierr);
11970ac07820SSatish Balay 
11980ac07820SSatish Balay   /* copy over the A part */
11990ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
12000ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12010ac07820SSatish Balay   row = baij->rstart;
12020ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
12030ac07820SSatish Balay 
12040ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12050ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12060ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12070ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12080ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
12090ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12100ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12110ac07820SSatish Balay         col++; a += bs;
12120ac07820SSatish Balay       }
12130ac07820SSatish Balay     }
12140ac07820SSatish Balay   }
12150ac07820SSatish Balay   /* copy over the B part */
12160ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
12170ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12180ac07820SSatish Balay   row = baij->rstart*bs;
12190ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12200ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12210ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12220ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12230ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
12240ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12250ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12260ac07820SSatish Balay         col++; a += bs;
12270ac07820SSatish Balay       }
12280ac07820SSatish Balay     }
12290ac07820SSatish Balay   }
12300ac07820SSatish Balay   PetscFree(rvals);
12310ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12320ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12330ac07820SSatish Balay 
12340ac07820SSatish Balay   if (matout != PETSC_NULL) {
12350ac07820SSatish Balay     *matout = B;
12360ac07820SSatish Balay   } else {
12370ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
12380ac07820SSatish Balay     PetscFree(baij->rowners);
12390ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
12400ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
12410ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
12420ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
12430ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
12440ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
12450ac07820SSatish Balay     PetscFree(baij);
1246f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
12470ac07820SSatish Balay     PetscHeaderDestroy(B);
12480ac07820SSatish Balay   }
12490ac07820SSatish Balay   return 0;
12500ac07820SSatish Balay }
12510e95ebc0SSatish Balay 
12525615d1e5SSatish Balay #undef __FUNC__
12535615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
12540e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
12550e95ebc0SSatish Balay {
12560e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
12570e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
12580e95ebc0SSatish Balay   int ierr,s1,s2,s3;
12590e95ebc0SSatish Balay 
12600e95ebc0SSatish Balay   if (ll)  {
12610e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
12620e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1263e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
12640e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
12650e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
12660e95ebc0SSatish Balay   }
1267e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
12680e95ebc0SSatish Balay   return 0;
12690e95ebc0SSatish Balay }
12700e95ebc0SSatish Balay 
12710ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
12720ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
12730ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
12740ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
12750ac07820SSatish Balay    routine.
12760ac07820SSatish Balay */
12775615d1e5SSatish Balay #undef __FUNC__
12785615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1279ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
12800ac07820SSatish Balay {
12810ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
12820ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
12830ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
12840ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
12850ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
12860ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
12870ac07820SSatish Balay   MPI_Comm       comm = A->comm;
12880ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
12890ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
12900ac07820SSatish Balay   IS             istmp;
12910ac07820SSatish Balay 
12920ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
12930ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
12940ac07820SSatish Balay 
12950ac07820SSatish Balay   /*  first count number of contributors to each processor */
12960ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
12970ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
12980ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
12990ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13000ac07820SSatish Balay     idx = rows[i];
13010ac07820SSatish Balay     found = 0;
13020ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
13030ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
13040ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
13050ac07820SSatish Balay       }
13060ac07820SSatish Balay     }
1307e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
13080ac07820SSatish Balay   }
13090ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
13100ac07820SSatish Balay 
13110ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
13120ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
13130ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
13140ac07820SSatish Balay   nrecvs = work[rank];
13150ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
13160ac07820SSatish Balay   nmax = work[rank];
13170ac07820SSatish Balay   PetscFree(work);
13180ac07820SSatish Balay 
13190ac07820SSatish Balay   /* post receives:   */
13200ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
13210ac07820SSatish Balay   CHKPTRQ(rvalues);
13220ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
13230ac07820SSatish Balay   CHKPTRQ(recv_waits);
13240ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13250ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
13260ac07820SSatish Balay   }
13270ac07820SSatish Balay 
13280ac07820SSatish Balay   /* do sends:
13290ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
13300ac07820SSatish Balay          the ith processor
13310ac07820SSatish Balay   */
13320ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
13330ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
13340ac07820SSatish Balay   CHKPTRQ(send_waits);
13350ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
13360ac07820SSatish Balay   starts[0] = 0;
13370ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13380ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13390ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
13400ac07820SSatish Balay   }
13410ac07820SSatish Balay   ISRestoreIndices(is,&rows);
13420ac07820SSatish Balay 
13430ac07820SSatish Balay   starts[0] = 0;
13440ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13450ac07820SSatish Balay   count = 0;
13460ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
13470ac07820SSatish Balay     if (procs[i]) {
13480ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
13490ac07820SSatish Balay     }
13500ac07820SSatish Balay   }
13510ac07820SSatish Balay   PetscFree(starts);
13520ac07820SSatish Balay 
13530ac07820SSatish Balay   base = owners[rank]*bs;
13540ac07820SSatish Balay 
13550ac07820SSatish Balay   /*  wait on receives */
13560ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
13570ac07820SSatish Balay   source = lens + nrecvs;
13580ac07820SSatish Balay   count  = nrecvs; slen = 0;
13590ac07820SSatish Balay   while (count) {
13600ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
13610ac07820SSatish Balay     /* unpack receives into our local space */
13620ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
13630ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
13640ac07820SSatish Balay     lens[imdex]  = n;
13650ac07820SSatish Balay     slen += n;
13660ac07820SSatish Balay     count--;
13670ac07820SSatish Balay   }
13680ac07820SSatish Balay   PetscFree(recv_waits);
13690ac07820SSatish Balay 
13700ac07820SSatish Balay   /* move the data into the send scatter */
13710ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
13720ac07820SSatish Balay   count = 0;
13730ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13740ac07820SSatish Balay     values = rvalues + i*nmax;
13750ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
13760ac07820SSatish Balay       lrows[count++] = values[j] - base;
13770ac07820SSatish Balay     }
13780ac07820SSatish Balay   }
13790ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
13800ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
13810ac07820SSatish Balay 
13820ac07820SSatish Balay   /* actually zap the local rows */
1383029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
13840ac07820SSatish Balay   PLogObjectParent(A,istmp);
13850ac07820SSatish Balay   PetscFree(lrows);
13860ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
13870ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
13880ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
13890ac07820SSatish Balay 
13900ac07820SSatish Balay   /* wait on sends */
13910ac07820SSatish Balay   if (nsends) {
13920ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
13930ac07820SSatish Balay     CHKPTRQ(send_status);
13940ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
13950ac07820SSatish Balay     PetscFree(send_status);
13960ac07820SSatish Balay   }
13970ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
13980ac07820SSatish Balay 
13990ac07820SSatish Balay   return 0;
14000ac07820SSatish Balay }
1401ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
14025615d1e5SSatish Balay #undef __FUNC__
14035615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1404ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1405ba4ca20aSSatish Balay {
1406ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1407ba4ca20aSSatish Balay 
1408ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1409ba4ca20aSSatish Balay   else return 0;
1410ba4ca20aSSatish Balay }
14110ac07820SSatish Balay 
14125615d1e5SSatish Balay #undef __FUNC__
14135615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1414ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1415bb5a7306SBarry Smith {
1416bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1417bb5a7306SBarry Smith   int         ierr;
1418bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1419bb5a7306SBarry Smith   return 0;
1420bb5a7306SBarry Smith }
1421bb5a7306SBarry Smith 
14220ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
14230ac07820SSatish Balay 
142479bdfe76SSatish Balay /* -------------------------------------------------------------------*/
142579bdfe76SSatish Balay static struct _MatOps MatOps = {
1426bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
14274c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
14284c50302cSBarry Smith   0,0,0,0,
14290ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
14300e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
143158667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
14324c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
14334c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
14344c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
143594a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1436d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1437ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1438bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1439ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
144079bdfe76SSatish Balay 
144179bdfe76SSatish Balay 
14425615d1e5SSatish Balay #undef __FUNC__
14435615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
144479bdfe76SSatish Balay /*@C
144579bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
144679bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
144779bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
144879bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
144979bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
145079bdfe76SSatish Balay 
145179bdfe76SSatish Balay    Input Parameters:
145279bdfe76SSatish Balay .  comm - MPI communicator
145379bdfe76SSatish Balay .  bs   - size of blockk
145479bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
145592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
145692e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
145792e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
145892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
145992e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
146079bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
146192e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
146279bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
146379bdfe76SSatish Balay            submatrix  (same for all local rows)
146492e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
146592e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
146692e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
146792e8d321SLois Curfman McInnes            it is zero.
146892e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
146979bdfe76SSatish Balay            submatrix (same for all local rows).
147092e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
147192e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
147292e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
147379bdfe76SSatish Balay 
147479bdfe76SSatish Balay    Output Parameter:
147579bdfe76SSatish Balay .  A - the matrix
147679bdfe76SSatish Balay 
147779bdfe76SSatish Balay    Notes:
147879bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
147979bdfe76SSatish Balay    (possibly both).
148079bdfe76SSatish Balay 
148179bdfe76SSatish Balay    Storage Information:
148279bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
148379bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
148479bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
148579bdfe76SSatish Balay    local matrix (a rectangular submatrix).
148679bdfe76SSatish Balay 
148779bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
148879bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
148979bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
149079bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
149179bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
149279bdfe76SSatish Balay 
149379bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
149479bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
149579bdfe76SSatish Balay 
149679bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
149779bdfe76SSatish Balay $         -------------------
149879bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
149979bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
150079bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
150179bdfe76SSatish Balay $         -------------------
150279bdfe76SSatish Balay $
150379bdfe76SSatish Balay 
150479bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
150579bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
150679bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
150757b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
150879bdfe76SSatish Balay 
150979bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
151079bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
151179bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
151292e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
151392e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
15146da5968aSLois Curfman McInnes    matrices.
151579bdfe76SSatish Balay 
151692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
151779bdfe76SSatish Balay 
151879bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
151979bdfe76SSatish Balay @*/
152079bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
152179bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
152279bdfe76SSatish Balay {
152379bdfe76SSatish Balay   Mat          B;
152479bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
15254c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
152679bdfe76SSatish Balay 
1527e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
152879bdfe76SSatish Balay   *A = 0;
1529f09e8eb9SSatish Balay   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
153079bdfe76SSatish Balay   PLogObjectCreate(B);
153179bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
153279bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
153379bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
15344c50302cSBarry Smith   MPI_Comm_size(comm,&size);
15354c50302cSBarry Smith   if (size == 1) {
15364c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
15374c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
15384c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
15394c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
15404c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
15414c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
15424c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
15434c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
15444c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
15454c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
15464c50302cSBarry Smith   }
15474c50302cSBarry Smith 
154879bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
154979bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
155090f02eecSBarry Smith   B->mapping    = 0;
155179bdfe76SSatish Balay   B->factor     = 0;
155279bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
155379bdfe76SSatish Balay 
1554e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
155579bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
155679bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
155779bdfe76SSatish Balay 
155879bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1559e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1560e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1561e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1562cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1563cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
156479bdfe76SSatish Balay 
156579bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
156679bdfe76SSatish Balay     work[0] = m; work[1] = n;
156779bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
156879bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
156979bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
157079bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
157179bdfe76SSatish Balay   }
157279bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
157379bdfe76SSatish Balay     Mbs = M/bs;
1574e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
157579bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
157679bdfe76SSatish Balay     m   = mbs*bs;
157779bdfe76SSatish Balay   }
157879bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
157979bdfe76SSatish Balay     Nbs = N/bs;
1580e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
158179bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
158279bdfe76SSatish Balay     n   = nbs*bs;
158379bdfe76SSatish Balay   }
1584e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
158579bdfe76SSatish Balay 
158679bdfe76SSatish Balay   b->m = m; B->m = m;
158779bdfe76SSatish Balay   b->n = n; B->n = n;
158879bdfe76SSatish Balay   b->N = N; B->N = N;
158979bdfe76SSatish Balay   b->M = M; B->M = M;
159079bdfe76SSatish Balay   b->bs  = bs;
159179bdfe76SSatish Balay   b->bs2 = bs*bs;
159279bdfe76SSatish Balay   b->mbs = mbs;
159379bdfe76SSatish Balay   b->nbs = nbs;
159479bdfe76SSatish Balay   b->Mbs = Mbs;
159579bdfe76SSatish Balay   b->Nbs = Nbs;
159679bdfe76SSatish Balay 
159779bdfe76SSatish Balay   /* build local table of row and column ownerships */
159879bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1599f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
16000ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
160179bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
160279bdfe76SSatish Balay   b->rowners[0] = 0;
160379bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
160479bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
160579bdfe76SSatish Balay   }
160679bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
160779bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
16084fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
16094fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
16104fa0d573SSatish Balay 
161179bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
161279bdfe76SSatish Balay   b->cowners[0] = 0;
161379bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
161479bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
161579bdfe76SSatish Balay   }
161679bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
161779bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
16184fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
16194fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
162079bdfe76SSatish Balay 
162179bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1622029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
162379bdfe76SSatish Balay   PLogObjectParent(B,b->A);
162479bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1625029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
162679bdfe76SSatish Balay   PLogObjectParent(B,b->B);
162779bdfe76SSatish Balay 
162879bdfe76SSatish Balay   /* build cache for off array entries formed */
162979bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
163090f02eecSBarry Smith   b->donotstash  = 0;
163179bdfe76SSatish Balay   b->colmap      = 0;
163279bdfe76SSatish Balay   b->garray      = 0;
163379bdfe76SSatish Balay   b->roworiented = 1;
163479bdfe76SSatish Balay 
163530793edcSSatish Balay   /* stuff used in block assembly */
163630793edcSSatish Balay   b->barray      = 0;
163730793edcSSatish Balay 
163879bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
163979bdfe76SSatish Balay   b->lvec        = 0;
164079bdfe76SSatish Balay   b->Mvctx       = 0;
164179bdfe76SSatish Balay 
164279bdfe76SSatish Balay   /* stuff for MatGetRow() */
164379bdfe76SSatish Balay   b->rowindices   = 0;
164479bdfe76SSatish Balay   b->rowvalues    = 0;
164579bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
164679bdfe76SSatish Balay 
164779bdfe76SSatish Balay   *A = B;
164879bdfe76SSatish Balay   return 0;
164979bdfe76SSatish Balay }
1650026e39d0SSatish Balay 
16515615d1e5SSatish Balay #undef __FUNC__
16525615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
16530ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
16540ac07820SSatish Balay {
16550ac07820SSatish Balay   Mat         mat;
16560ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
16570ac07820SSatish Balay   int         ierr, len=0, flg;
16580ac07820SSatish Balay 
16590ac07820SSatish Balay   *newmat       = 0;
1660f09e8eb9SSatish Balay   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
16610ac07820SSatish Balay   PLogObjectCreate(mat);
16620ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
16630ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
16640ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
16650ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
16660ac07820SSatish Balay   mat->factor     = matin->factor;
16670ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
16680ac07820SSatish Balay 
16690ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
16700ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
16710ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
16720ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
16730ac07820SSatish Balay 
16740ac07820SSatish Balay   a->bs  = oldmat->bs;
16750ac07820SSatish Balay   a->bs2 = oldmat->bs2;
16760ac07820SSatish Balay   a->mbs = oldmat->mbs;
16770ac07820SSatish Balay   a->nbs = oldmat->nbs;
16780ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
16790ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
16800ac07820SSatish Balay 
16810ac07820SSatish Balay   a->rstart       = oldmat->rstart;
16820ac07820SSatish Balay   a->rend         = oldmat->rend;
16830ac07820SSatish Balay   a->cstart       = oldmat->cstart;
16840ac07820SSatish Balay   a->cend         = oldmat->cend;
16850ac07820SSatish Balay   a->size         = oldmat->size;
16860ac07820SSatish Balay   a->rank         = oldmat->rank;
1687e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
16880ac07820SSatish Balay   a->rowvalues    = 0;
16890ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
169030793edcSSatish Balay   a->barray       = 0;
16910ac07820SSatish Balay 
16920ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1693f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
16940ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
16950ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
16960ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
16970ac07820SSatish Balay   if (oldmat->colmap) {
16980ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
16990ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
17000ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
17010ac07820SSatish Balay   } else a->colmap = 0;
17020ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
17030ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
17040ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
17050ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
17060ac07820SSatish Balay   } else a->garray = 0;
17070ac07820SSatish Balay 
17080ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
17090ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
17100ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
17110ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
17120ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
17130ac07820SSatish Balay   PLogObjectParent(mat,a->A);
17140ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
17150ac07820SSatish Balay   PLogObjectParent(mat,a->B);
17160ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
17170ac07820SSatish Balay   if (flg) {
17180ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
17190ac07820SSatish Balay   }
17200ac07820SSatish Balay   *newmat = mat;
17210ac07820SSatish Balay   return 0;
17220ac07820SSatish Balay }
172357b952d6SSatish Balay 
172457b952d6SSatish Balay #include "sys.h"
172557b952d6SSatish Balay 
17265615d1e5SSatish Balay #undef __FUNC__
17275615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
172857b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
172957b952d6SSatish Balay {
173057b952d6SSatish Balay   Mat          A;
173157b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
173257b952d6SSatish Balay   Scalar       *vals,*buf;
173357b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
173457b952d6SSatish Balay   MPI_Status   status;
1735cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
173657b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
173757b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
173857b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
173957b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
174057b952d6SSatish Balay 
174157b952d6SSatish Balay 
174257b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
174357b952d6SSatish Balay   bs2  = bs*bs;
174457b952d6SSatish Balay 
174557b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
174657b952d6SSatish Balay   if (!rank) {
174757b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
174857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1749e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
175057b952d6SSatish Balay   }
175157b952d6SSatish Balay 
175257b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
175357b952d6SSatish Balay   M = header[1]; N = header[2];
175457b952d6SSatish Balay 
1755e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
175657b952d6SSatish Balay 
175757b952d6SSatish Balay   /*
175857b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
175957b952d6SSatish Balay      divisible by the blocksize
176057b952d6SSatish Balay   */
176157b952d6SSatish Balay   Mbs        = M/bs;
176257b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
176357b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
176457b952d6SSatish Balay   else                  Mbs++;
176557b952d6SSatish Balay   if (extra_rows &&!rank) {
1766b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
176757b952d6SSatish Balay   }
1768537820f0SBarry Smith 
176957b952d6SSatish Balay   /* determine ownership of all rows */
177057b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
177157b952d6SSatish Balay   m   = mbs * bs;
1772cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1773cee3aa6bSSatish Balay   browners = rowners + size + 1;
177457b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
177557b952d6SSatish Balay   rowners[0] = 0;
1776cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1777cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
177857b952d6SSatish Balay   rstart = rowners[rank];
177957b952d6SSatish Balay   rend   = rowners[rank+1];
178057b952d6SSatish Balay 
178157b952d6SSatish Balay   /* distribute row lengths to all processors */
178257b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
178357b952d6SSatish Balay   if (!rank) {
178457b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
178557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
178657b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
178757b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1788cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1789cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
179057b952d6SSatish Balay     PetscFree(sndcounts);
179157b952d6SSatish Balay   }
179257b952d6SSatish Balay   else {
179357b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
179457b952d6SSatish Balay   }
179557b952d6SSatish Balay 
179657b952d6SSatish Balay   if (!rank) {
179757b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
179857b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
179957b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
180057b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
180157b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
180257b952d6SSatish Balay         procsnz[i] += rowlengths[j];
180357b952d6SSatish Balay       }
180457b952d6SSatish Balay     }
180557b952d6SSatish Balay     PetscFree(rowlengths);
180657b952d6SSatish Balay 
180757b952d6SSatish Balay     /* determine max buffer needed and allocate it */
180857b952d6SSatish Balay     maxnz = 0;
180957b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
181057b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
181157b952d6SSatish Balay     }
181257b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
181357b952d6SSatish Balay 
181457b952d6SSatish Balay     /* read in my part of the matrix column indices  */
181557b952d6SSatish Balay     nz = procsnz[0];
181657b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
181757b952d6SSatish Balay     mycols = ibuf;
1818cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
181957b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1820cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1821cee3aa6bSSatish Balay 
182257b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
182357b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
182457b952d6SSatish Balay       nz = procsnz[i];
182557b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
182657b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
182757b952d6SSatish Balay     }
182857b952d6SSatish Balay     /* read in the stuff for the last proc */
182957b952d6SSatish Balay     if ( size != 1 ) {
183057b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
183157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
183257b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1833cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
183457b952d6SSatish Balay     }
183557b952d6SSatish Balay     PetscFree(cols);
183657b952d6SSatish Balay   }
183757b952d6SSatish Balay   else {
183857b952d6SSatish Balay     /* determine buffer space needed for message */
183957b952d6SSatish Balay     nz = 0;
184057b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
184157b952d6SSatish Balay       nz += locrowlens[i];
184257b952d6SSatish Balay     }
184357b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
184457b952d6SSatish Balay     mycols = ibuf;
184557b952d6SSatish Balay     /* receive message of column indices*/
184657b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
184757b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1848e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
184957b952d6SSatish Balay   }
185057b952d6SSatish Balay 
185157b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1852cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1853cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
185457b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1855cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
185657b952d6SSatish Balay   masked1 = mask    + Mbs;
185757b952d6SSatish Balay   masked2 = masked1 + Mbs;
185857b952d6SSatish Balay   rowcount = 0; nzcount = 0;
185957b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
186057b952d6SSatish Balay     dcount  = 0;
186157b952d6SSatish Balay     odcount = 0;
186257b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
186357b952d6SSatish Balay       kmax = locrowlens[rowcount];
186457b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
186557b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
186657b952d6SSatish Balay         if (!mask[tmp]) {
186757b952d6SSatish Balay           mask[tmp] = 1;
186857b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
186957b952d6SSatish Balay           else masked1[dcount++] = tmp;
187057b952d6SSatish Balay         }
187157b952d6SSatish Balay       }
187257b952d6SSatish Balay       rowcount++;
187357b952d6SSatish Balay     }
1874cee3aa6bSSatish Balay 
187557b952d6SSatish Balay     dlens[i]  = dcount;
187657b952d6SSatish Balay     odlens[i] = odcount;
1877cee3aa6bSSatish Balay 
187857b952d6SSatish Balay     /* zero out the mask elements we set */
187957b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
188057b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
188157b952d6SSatish Balay   }
1882cee3aa6bSSatish Balay 
188357b952d6SSatish Balay   /* create our matrix */
1884537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1885537820f0SBarry Smith          CHKERRQ(ierr);
188657b952d6SSatish Balay   A = *newmat;
18876d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
188857b952d6SSatish Balay 
188957b952d6SSatish Balay   if (!rank) {
189057b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
189157b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
189257b952d6SSatish Balay     nz = procsnz[0];
189357b952d6SSatish Balay     vals = buf;
1894cee3aa6bSSatish Balay     mycols = ibuf;
1895cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
189657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1897cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1898537820f0SBarry Smith 
189957b952d6SSatish Balay     /* insert into matrix */
190057b952d6SSatish Balay     jj      = rstart*bs;
190157b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
190257b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
190357b952d6SSatish Balay       mycols += locrowlens[i];
190457b952d6SSatish Balay       vals   += locrowlens[i];
190557b952d6SSatish Balay       jj++;
190657b952d6SSatish Balay     }
190757b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
190857b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
190957b952d6SSatish Balay       nz = procsnz[i];
191057b952d6SSatish Balay       vals = buf;
191157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
191257b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
191357b952d6SSatish Balay     }
191457b952d6SSatish Balay     /* the last proc */
191557b952d6SSatish Balay     if ( size != 1 ){
191657b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1917cee3aa6bSSatish Balay       vals = buf;
191857b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
191957b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1920cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
192157b952d6SSatish Balay     }
192257b952d6SSatish Balay     PetscFree(procsnz);
192357b952d6SSatish Balay   }
192457b952d6SSatish Balay   else {
192557b952d6SSatish Balay     /* receive numeric values */
192657b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
192757b952d6SSatish Balay 
192857b952d6SSatish Balay     /* receive message of values*/
192957b952d6SSatish Balay     vals = buf;
1930cee3aa6bSSatish Balay     mycols = ibuf;
193157b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
193257b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1933e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
193457b952d6SSatish Balay 
193557b952d6SSatish Balay     /* insert into matrix */
193657b952d6SSatish Balay     jj      = rstart*bs;
1937cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
193857b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
193957b952d6SSatish Balay       mycols += locrowlens[i];
194057b952d6SSatish Balay       vals   += locrowlens[i];
194157b952d6SSatish Balay       jj++;
194257b952d6SSatish Balay     }
194357b952d6SSatish Balay   }
194457b952d6SSatish Balay   PetscFree(locrowlens);
194557b952d6SSatish Balay   PetscFree(buf);
194657b952d6SSatish Balay   PetscFree(ibuf);
194757b952d6SSatish Balay   PetscFree(rowners);
194857b952d6SSatish Balay   PetscFree(dlens);
1949cee3aa6bSSatish Balay   PetscFree(mask);
19506d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19516d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
195257b952d6SSatish Balay   return 0;
195357b952d6SSatish Balay }
195457b952d6SSatish Balay 
195557b952d6SSatish Balay 
1956