xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f09e8eb94a771781a812a8d81a9ca3d36ec35eba)
179bdfe76SSatish Balay #ifndef lint
2*f09e8eb9SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.70 1997/05/09 22:32:36 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2257b952d6SSatish Balay 
233b2fbd54SBarry Smith 
24537820f0SBarry Smith /*
25537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2857b952d6SSatish Balay    length of colmap equals the global matrix length.
2957b952d6SSatish Balay */
305615d1e5SSatish Balay #undef __FUNC__
315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3357b952d6SSatish Balay {
3457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
36928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3757b952d6SSatish Balay 
38758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4057b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
41928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4257b952d6SSatish Balay   return 0;
4357b952d6SSatish Balay }
4457b952d6SSatish Balay 
455615d1e5SSatish Balay #undef __FUNC__
465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
483b2fbd54SBarry Smith                             PetscTruth *done)
4957b952d6SSatish Balay {
503b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5157b952d6SSatish Balay   int         ierr;
523b2fbd54SBarry Smith   if (aij->size == 1) {
533b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
553b2fbd54SBarry Smith   return 0;
563b2fbd54SBarry Smith }
573b2fbd54SBarry Smith 
585615d1e5SSatish Balay #undef __FUNC__
595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
613b2fbd54SBarry Smith                                 PetscTruth *done)
623b2fbd54SBarry Smith {
633b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
643b2fbd54SBarry Smith   int        ierr;
653b2fbd54SBarry Smith   if (aij->size == 1) {
663b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
67e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6857b952d6SSatish Balay   return 0;
6957b952d6SSatish Balay }
7080c1aa95SSatish Balay #define CHUNKSIZE  10
7180c1aa95SSatish Balay 
72f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7380c1aa95SSatish Balay { \
7480c1aa95SSatish Balay  \
7580c1aa95SSatish Balay     brow = row/bs;  \
7680c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
77ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
7880c1aa95SSatish Balay       bcol = col/bs; \
7980c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
80ab26458aSBarry Smith       low = 0; high = nrow; \
81ab26458aSBarry Smith       while (high-low > 3) { \
82ab26458aSBarry Smith         t = (low+high)/2; \
83ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
84ab26458aSBarry Smith         else              low  = t; \
85ab26458aSBarry Smith       } \
86ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
8780c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8880c1aa95SSatish Balay         if (rp[_i] == bcol) { \
8980c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
90eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
91eada6651SSatish Balay           else                    *bap  = value;  \
92ac7a638eSSatish Balay           goto a_noinsert; \
9380c1aa95SSatish Balay         } \
9480c1aa95SSatish Balay       } \
9589280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
9611ebbc71SLois Curfman McInnes       else if (a->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
9780c1aa95SSatish Balay       if (nrow >= rmax) { \
9880c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9980c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
10080c1aa95SSatish Balay         Scalar *new_a; \
10180c1aa95SSatish Balay  \
10211ebbc71SLois Curfman McInnes         if (a->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
10389280ab3SLois Curfman McInnes  \
10480c1aa95SSatish Balay         /* malloc new storage space */ \
10580c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
10680c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
10780c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10880c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10980c1aa95SSatish Balay  \
11080c1aa95SSatish Balay         /* copy over old data into new slots */ \
11180c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
11280c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
11380c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
11480c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
11580c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
11680c1aa95SSatish Balay                                                            len*sizeof(int)); \
11780c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11880c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11980c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
12080c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
12180c1aa95SSatish Balay         /* free up old matrix storage */ \
12280c1aa95SSatish Balay         PetscFree(a->a);  \
12380c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
12480c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
12580c1aa95SSatish Balay         a->singlemalloc = 1; \
12680c1aa95SSatish Balay  \
12780c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
128ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
12980c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
13080c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
13180c1aa95SSatish Balay         a->reallocs++; \
13280c1aa95SSatish Balay         a->nz++; \
13380c1aa95SSatish Balay       } \
13480c1aa95SSatish Balay       N = nrow++ - 1;  \
13580c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
13680c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
13780c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13880c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13980c1aa95SSatish Balay       } \
14080c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
14180c1aa95SSatish Balay       rp[_i]                      = bcol;  \
14280c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
143ac7a638eSSatish Balay       a_noinsert:; \
14480c1aa95SSatish Balay     ailen[brow] = nrow; \
14580c1aa95SSatish Balay }
14657b952d6SSatish Balay 
147ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
148ac7a638eSSatish Balay { \
149ac7a638eSSatish Balay  \
150ac7a638eSSatish Balay     brow = row/bs;  \
151ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
153ac7a638eSSatish Balay       bcol = col/bs; \
154ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
155ac7a638eSSatish Balay       low = 0; high = nrow; \
156ac7a638eSSatish Balay       while (high-low > 3) { \
157ac7a638eSSatish Balay         t = (low+high)/2; \
158ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
159ac7a638eSSatish Balay         else              low  = t; \
160ac7a638eSSatish Balay       } \
161ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
162ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
163ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
164ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
165ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
166ac7a638eSSatish Balay           else                    *bap  = value;  \
167ac7a638eSSatish Balay           goto b_noinsert; \
168ac7a638eSSatish Balay         } \
169ac7a638eSSatish Balay       } \
17089280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
17111ebbc71SLois Curfman McInnes       else if (b->nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
172ac7a638eSSatish Balay       if (nrow >= rmax) { \
173ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
17474c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
175ac7a638eSSatish Balay         Scalar *new_a; \
176ac7a638eSSatish Balay  \
17711ebbc71SLois Curfman McInnes         if (b->nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); \
17889280ab3SLois Curfman McInnes  \
179ac7a638eSSatish Balay         /* malloc new storage space */ \
18074c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
181ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
182ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
183ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
184ac7a638eSSatish Balay  \
185ac7a638eSSatish Balay         /* copy over old data into new slots */ \
186ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
18774c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
188ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
189ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
190ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
191ac7a638eSSatish Balay                                                            len*sizeof(int)); \
192ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
193ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
194ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
195ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
196ac7a638eSSatish Balay         /* free up old matrix storage */ \
19774c639caSSatish Balay         PetscFree(b->a);  \
19874c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
19974c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
20074c639caSSatish Balay         b->singlemalloc = 1; \
201ac7a638eSSatish Balay  \
202ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
203ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
20474c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
20574c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
20674c639caSSatish Balay         b->reallocs++; \
20774c639caSSatish Balay         b->nz++; \
208ac7a638eSSatish Balay       } \
209ac7a638eSSatish Balay       N = nrow++ - 1;  \
210ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
211ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
212ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
213ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
214ac7a638eSSatish Balay       } \
215ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
216ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
217ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
218ac7a638eSSatish Balay       b_noinsert:; \
219ac7a638eSSatish Balay     bilen[brow] = nrow; \
220ac7a638eSSatish Balay }
221ac7a638eSSatish Balay 
2225615d1e5SSatish Balay #undef __FUNC__
2235615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
224ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
22557b952d6SSatish Balay {
22657b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
22757b952d6SSatish Balay   Scalar      value;
2284fa0d573SSatish Balay   int         ierr,i,j,row,col;
2294fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2304fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2314fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
23257b952d6SSatish Balay 
233eada6651SSatish Balay   /* Some Variables required in the macro */
23480c1aa95SSatish Balay   Mat         A = baij->A;
23580c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
236ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
237ac7a638eSSatish Balay   Scalar      *aa=a->a;
238ac7a638eSSatish Balay 
239ac7a638eSSatish Balay   Mat         B = baij->B;
240ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
241ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
242ac7a638eSSatish Balay   Scalar      *ba=b->a;
243ac7a638eSSatish Balay 
244ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
245ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
246ac7a638eSSatish Balay   Scalar      *ap,*bap;
24780c1aa95SSatish Balay 
24857b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
249639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
250e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
251e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
252639f9d9dSBarry Smith #endif
25357b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
25457b952d6SSatish Balay       row = im[i] - rstart_orig;
25557b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
25657b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
25757b952d6SSatish Balay           col = in[j] - cstart_orig;
25857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
26080c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
26157b952d6SSatish Balay         }
262639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
263e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
264e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
265639f9d9dSBarry Smith #endif
26657b952d6SSatish Balay         else {
26757b952d6SSatish Balay           if (mat->was_assembled) {
268905e6a2fSBarry Smith             if (!baij->colmap) {
269905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
270905e6a2fSBarry Smith             }
271905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
27257b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
27357b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
27457b952d6SSatish Balay               col =  in[j];
2759bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2769bf004c3SSatish Balay               B = baij->B;
2779bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2789bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2799bf004c3SSatish Balay               ba=b->a;
28057b952d6SSatish Balay             }
28157b952d6SSatish Balay           }
28257b952d6SSatish Balay           else col = in[j];
28357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
284ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
285ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
28657b952d6SSatish Balay         }
28757b952d6SSatish Balay       }
28857b952d6SSatish Balay     }
28957b952d6SSatish Balay     else {
29090f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
29157b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
29257b952d6SSatish Balay       }
29357b952d6SSatish Balay       else {
29490f02eecSBarry Smith         if (!baij->donotstash) {
29557b952d6SSatish Balay           row = im[i];
29657b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
29757b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
29857b952d6SSatish Balay           }
29957b952d6SSatish Balay         }
30057b952d6SSatish Balay       }
30157b952d6SSatish Balay     }
30290f02eecSBarry Smith   }
30357b952d6SSatish Balay   return 0;
30457b952d6SSatish Balay }
30557b952d6SSatish Balay 
306ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
307ab26458aSBarry Smith #undef __FUNC__
308ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
309ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
310ab26458aSBarry Smith {
311ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
31230793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
313abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
314ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
315ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
316ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
317ab26458aSBarry Smith 
31830793edcSSatish Balay 
31930793edcSSatish Balay   if(!barray) {
32030793edcSSatish Balay     barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
32130793edcSSatish Balay     baij->barray = barray;
32230793edcSSatish Balay   }
32330793edcSSatish Balay 
324ab26458aSBarry Smith   if (roworiented) {
325ab26458aSBarry Smith     stepval = (n-1)*bs;
326ab26458aSBarry Smith   } else {
327ab26458aSBarry Smith     stepval = (m-1)*bs;
328ab26458aSBarry Smith   }
329ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
330ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
331ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
332ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
333ab26458aSBarry Smith #endif
334ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
335ab26458aSBarry Smith       row = im[i] - rstart;
336ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
33715b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
33815b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
33915b57d14SSatish Balay           barray = v + i*bs2;
34015b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
34115b57d14SSatish Balay           barray = v + j*bs2;
34215b57d14SSatish Balay         } else { /* Here a copy is required */
343ab26458aSBarry Smith           if (roworiented) {
344ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
345ab26458aSBarry Smith           } else {
346ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
347abef11f7SSatish Balay           }
348ab26458aSBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval )
349ab26458aSBarry Smith             for (jj=0; jj<bs; jj++ )
35030793edcSSatish Balay               *barray++  = *value++;
35130793edcSSatish Balay           barray -=bs2;
35215b57d14SSatish Balay         }
353abef11f7SSatish Balay 
354abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
355abef11f7SSatish Balay           col = in[j] - cstart;
35630793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
357ab26458aSBarry Smith         }
358ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
359ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
360ab26458aSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");}
361ab26458aSBarry Smith #endif
362ab26458aSBarry Smith         else {
363ab26458aSBarry Smith           if (mat->was_assembled) {
364ab26458aSBarry Smith             if (!baij->colmap) {
365ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
366ab26458aSBarry Smith             }
367ab26458aSBarry Smith             col = baij->colmap[in[j]] - 1;
368ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
369ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
370ab26458aSBarry Smith               col =  in[j];
371ab26458aSBarry Smith             }
372ab26458aSBarry Smith           }
373ab26458aSBarry Smith           else col = in[j];
37430793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
375ab26458aSBarry Smith         }
376ab26458aSBarry Smith       }
377ab26458aSBarry Smith     }
378ab26458aSBarry Smith     else {
379ab26458aSBarry Smith       if (!baij->donotstash) {
380ab26458aSBarry Smith         if (roworiented ) {
381abef11f7SSatish Balay           row   = im[i]*bs;
382abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
383abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
384abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
385abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
386abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
387abef11f7SSatish Balay               }
388ab26458aSBarry Smith             }
389ab26458aSBarry Smith           }
390ab26458aSBarry Smith         }
391ab26458aSBarry Smith         else {
392ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
393abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
394abef11f7SSatish Balay             col   = in[j]*bs;
395abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
396abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
397abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
398ab26458aSBarry Smith               }
399ab26458aSBarry Smith             }
400ab26458aSBarry Smith           }
401abef11f7SSatish Balay 
402abef11f7SSatish Balay         }
403abef11f7SSatish Balay       }
404ab26458aSBarry Smith     }
405ab26458aSBarry Smith   }
406ab26458aSBarry Smith   return 0;
407ab26458aSBarry Smith }
408ab26458aSBarry Smith 
4095615d1e5SSatish Balay #undef __FUNC__
4105615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
411ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
412d6de1c52SSatish Balay {
413d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
414d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
415d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
416d6de1c52SSatish Balay 
417d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
418e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
419e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
420d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
421d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
422d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
423e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
424e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
425d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
426d6de1c52SSatish Balay           col = idxn[j] - bscstart;
427d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
428d6de1c52SSatish Balay         }
429d6de1c52SSatish Balay         else {
430905e6a2fSBarry Smith           if (!baij->colmap) {
431905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
432905e6a2fSBarry Smith           }
433e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
434dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
435d9d09a02SSatish Balay           else {
436dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
437d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
438d6de1c52SSatish Balay           }
439d6de1c52SSatish Balay         }
440d6de1c52SSatish Balay       }
441d9d09a02SSatish Balay     }
442d6de1c52SSatish Balay     else {
443e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
444d6de1c52SSatish Balay     }
445d6de1c52SSatish Balay   }
446d6de1c52SSatish Balay   return 0;
447d6de1c52SSatish Balay }
448d6de1c52SSatish Balay 
4495615d1e5SSatish Balay #undef __FUNC__
4505615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
451ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
452d6de1c52SSatish Balay {
453d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
454d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
455acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
456d6de1c52SSatish Balay   double     sum = 0.0;
457d6de1c52SSatish Balay   Scalar     *v;
458d6de1c52SSatish Balay 
459d6de1c52SSatish Balay   if (baij->size == 1) {
460d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
461d6de1c52SSatish Balay   } else {
462d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
463d6de1c52SSatish Balay       v = amat->a;
464d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
465d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
466d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
467d6de1c52SSatish Balay #else
468d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
469d6de1c52SSatish Balay #endif
470d6de1c52SSatish Balay       }
471d6de1c52SSatish Balay       v = bmat->a;
472d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
473d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
474d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
475d6de1c52SSatish Balay #else
476d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
477d6de1c52SSatish Balay #endif
478d6de1c52SSatish Balay       }
479d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
480d6de1c52SSatish Balay       *norm = sqrt(*norm);
481d6de1c52SSatish Balay     }
482acdf5bf4SSatish Balay     else
483e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
484d6de1c52SSatish Balay   }
485d6de1c52SSatish Balay   return 0;
486d6de1c52SSatish Balay }
48757b952d6SSatish Balay 
4885615d1e5SSatish Balay #undef __FUNC__
4895615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
490ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
49157b952d6SSatish Balay {
49257b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
49357b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
49457b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
49557b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
49657b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
49757b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
49857b952d6SSatish Balay   InsertMode  addv;
49957b952d6SSatish Balay   Scalar      *rvalues,*svalues;
50057b952d6SSatish Balay 
50157b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
502e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
50357b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
504e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
50557b952d6SSatish Balay   }
506e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
50757b952d6SSatish Balay 
50857b952d6SSatish Balay   /*  first count number of contributors to each processor */
50957b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
51057b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
51157b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
51257b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
51357b952d6SSatish Balay     idx = baij->stash.idx[i];
51457b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
51557b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
51657b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
51757b952d6SSatish Balay       }
51857b952d6SSatish Balay     }
51957b952d6SSatish Balay   }
52057b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
52157b952d6SSatish Balay 
52257b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
52357b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
52457b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
52557b952d6SSatish Balay   nreceives = work[rank];
52657b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
52757b952d6SSatish Balay   nmax = work[rank];
52857b952d6SSatish Balay   PetscFree(work);
52957b952d6SSatish Balay 
53057b952d6SSatish Balay   /* post receives:
53157b952d6SSatish Balay        1) each message will consist of ordered pairs
53257b952d6SSatish Balay      (global index,value) we store the global index as a double
53357b952d6SSatish Balay      to simplify the message passing.
53457b952d6SSatish Balay        2) since we don't know how long each individual message is we
53557b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
53657b952d6SSatish Balay      this is a lot of wasted space.
53757b952d6SSatish Balay 
53857b952d6SSatish Balay 
53957b952d6SSatish Balay        This could be done better.
54057b952d6SSatish Balay   */
54157b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
54257b952d6SSatish Balay   CHKPTRQ(rvalues);
54357b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
54457b952d6SSatish Balay   CHKPTRQ(recv_waits);
54557b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
54657b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
54757b952d6SSatish Balay               comm,recv_waits+i);
54857b952d6SSatish Balay   }
54957b952d6SSatish Balay 
55057b952d6SSatish Balay   /* do sends:
55157b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
55257b952d6SSatish Balay          the ith processor
55357b952d6SSatish Balay   */
55457b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
55557b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
55657b952d6SSatish Balay   CHKPTRQ(send_waits);
55757b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
55857b952d6SSatish Balay   starts[0] = 0;
55957b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
56057b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
56157b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
56257b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
56357b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
56457b952d6SSatish Balay   }
56557b952d6SSatish Balay   PetscFree(owner);
56657b952d6SSatish Balay   starts[0] = 0;
56757b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
56857b952d6SSatish Balay   count = 0;
56957b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
57057b952d6SSatish Balay     if (procs[i]) {
57157b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
57257b952d6SSatish Balay                 comm,send_waits+count++);
57357b952d6SSatish Balay     }
57457b952d6SSatish Balay   }
57557b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
57657b952d6SSatish Balay 
57757b952d6SSatish Balay   /* Free cache space */
578d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
57957b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
58057b952d6SSatish Balay 
58157b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
58257b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
58357b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
58457b952d6SSatish Balay   baij->rmax       = nmax;
58557b952d6SSatish Balay 
58657b952d6SSatish Balay   return 0;
58757b952d6SSatish Balay }
58857b952d6SSatish Balay 
58957b952d6SSatish Balay 
5905615d1e5SSatish Balay #undef __FUNC__
5915615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
592ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
59357b952d6SSatish Balay {
59457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
59557b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
59657b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
59757b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
59857b952d6SSatish Balay   Scalar      *values,val;
599e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
60057b952d6SSatish Balay 
60157b952d6SSatish Balay   /*  wait on receives */
60257b952d6SSatish Balay   while (count) {
60357b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
60457b952d6SSatish Balay     /* unpack receives into our local space */
60557b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
60657b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
60757b952d6SSatish Balay     n = n/3;
60857b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
60957b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
61057b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
61157b952d6SSatish Balay       val = values[3*i+2];
61257b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
61357b952d6SSatish Balay         col -= baij->cstart*bs;
61457b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
61557b952d6SSatish Balay       }
61657b952d6SSatish Balay       else {
61757b952d6SSatish Balay         if (mat->was_assembled) {
618905e6a2fSBarry Smith           if (!baij->colmap) {
619905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
620905e6a2fSBarry Smith           }
621905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
62257b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
62357b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
62457b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
62557b952d6SSatish Balay           }
62657b952d6SSatish Balay         }
62757b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
62857b952d6SSatish Balay       }
62957b952d6SSatish Balay     }
63057b952d6SSatish Balay     count--;
63157b952d6SSatish Balay   }
63257b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
63357b952d6SSatish Balay 
63457b952d6SSatish Balay   /* wait on sends */
63557b952d6SSatish Balay   if (baij->nsends) {
63657b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
63757b952d6SSatish Balay     CHKPTRQ(send_status);
63857b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
63957b952d6SSatish Balay     PetscFree(send_status);
64057b952d6SSatish Balay   }
64157b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
64257b952d6SSatish Balay 
64357b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
64457b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
64557b952d6SSatish Balay 
64657b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
64757b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
64857b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
64957b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
65057b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
65157b952d6SSatish Balay   }
65257b952d6SSatish Balay 
6536d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
65457b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
65557b952d6SSatish Balay   }
65657b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
65757b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
65857b952d6SSatish Balay 
65957b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
66057b952d6SSatish Balay   return 0;
66157b952d6SSatish Balay }
66257b952d6SSatish Balay 
6635615d1e5SSatish Balay #undef __FUNC__
6645615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
66557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
66657b952d6SSatish Balay {
66757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
66857b952d6SSatish Balay   int          ierr;
66957b952d6SSatish Balay 
67057b952d6SSatish Balay   if (baij->size == 1) {
67157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
67257b952d6SSatish Balay   }
673e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
67457b952d6SSatish Balay   return 0;
67557b952d6SSatish Balay }
67657b952d6SSatish Balay 
6775615d1e5SSatish Balay #undef __FUNC__
6785615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
67957b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
68057b952d6SSatish Balay {
68157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
682cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
68357b952d6SSatish Balay   FILE         *fd;
68457b952d6SSatish Balay   ViewerType   vtype;
68557b952d6SSatish Balay 
68657b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
68757b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
68857b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
689639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
6904e220ebcSLois Curfman McInnes       MatInfo info;
69157b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
69257b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6934e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
69457b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
69557b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
6964e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
6974e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
6984e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
6994e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
7004e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
7014e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
70257b952d6SSatish Balay       fflush(fd);
70357b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
70457b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
70557b952d6SSatish Balay       return 0;
70657b952d6SSatish Balay     }
707639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
708bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
70957b952d6SSatish Balay       return 0;
71057b952d6SSatish Balay     }
71157b952d6SSatish Balay   }
71257b952d6SSatish Balay 
71357b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
71457b952d6SSatish Balay     Draw       draw;
71557b952d6SSatish Balay     PetscTruth isnull;
71657b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
71757b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
71857b952d6SSatish Balay   }
71957b952d6SSatish Balay 
72057b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
72157b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
72257b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
72357b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
72457b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
72557b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
72657b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
72757b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
72857b952d6SSatish Balay     fflush(fd);
72957b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
73057b952d6SSatish Balay   }
73157b952d6SSatish Balay   else {
73257b952d6SSatish Balay     int size = baij->size;
73357b952d6SSatish Balay     rank = baij->rank;
73457b952d6SSatish Balay     if (size == 1) {
73557b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
73657b952d6SSatish Balay     }
73757b952d6SSatish Balay     else {
73857b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
73957b952d6SSatish Balay       Mat         A;
74057b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
74157b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
74257b952d6SSatish Balay       int         mbs=baij->mbs;
74357b952d6SSatish Balay       Scalar      *a;
74457b952d6SSatish Balay 
74557b952d6SSatish Balay       if (!rank) {
746cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
74757b952d6SSatish Balay         CHKERRQ(ierr);
74857b952d6SSatish Balay       }
74957b952d6SSatish Balay       else {
750cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
75157b952d6SSatish Balay         CHKERRQ(ierr);
75257b952d6SSatish Balay       }
75357b952d6SSatish Balay       PLogObjectParent(mat,A);
75457b952d6SSatish Balay 
75557b952d6SSatish Balay       /* copy over the A part */
75657b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
75757b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
75857b952d6SSatish Balay       row = baij->rstart;
75957b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
76057b952d6SSatish Balay 
76157b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
76257b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
76357b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
76457b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
76557b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
76657b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
767cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
768cee3aa6bSSatish Balay             col++; a += bs;
76957b952d6SSatish Balay           }
77057b952d6SSatish Balay         }
77157b952d6SSatish Balay       }
77257b952d6SSatish Balay       /* copy over the B part */
77357b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
77457b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
77557b952d6SSatish Balay       row = baij->rstart*bs;
77657b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
77757b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
77857b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
77957b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
78057b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
78157b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
782cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
783cee3aa6bSSatish Balay             col++; a += bs;
78457b952d6SSatish Balay           }
78557b952d6SSatish Balay         }
78657b952d6SSatish Balay       }
78757b952d6SSatish Balay       PetscFree(rvals);
7886d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7896d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
79057b952d6SSatish Balay       if (!rank) {
79157b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
79257b952d6SSatish Balay       }
79357b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
79457b952d6SSatish Balay     }
79557b952d6SSatish Balay   }
79657b952d6SSatish Balay   return 0;
79757b952d6SSatish Balay }
79857b952d6SSatish Balay 
79957b952d6SSatish Balay 
80057b952d6SSatish Balay 
8015615d1e5SSatish Balay #undef __FUNC__
8025615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
803ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
80457b952d6SSatish Balay {
80557b952d6SSatish Balay   Mat         mat = (Mat) obj;
80657b952d6SSatish Balay   int         ierr;
80757b952d6SSatish Balay   ViewerType  vtype;
80857b952d6SSatish Balay 
80957b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
81057b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
81157b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
81257b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
81357b952d6SSatish Balay   }
81457b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
81557b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
81657b952d6SSatish Balay   }
81757b952d6SSatish Balay   return 0;
81857b952d6SSatish Balay }
81957b952d6SSatish Balay 
8205615d1e5SSatish Balay #undef __FUNC__
8215615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
822ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
82379bdfe76SSatish Balay {
82479bdfe76SSatish Balay   Mat         mat = (Mat) obj;
82579bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
82679bdfe76SSatish Balay   int         ierr;
82779bdfe76SSatish Balay 
82879bdfe76SSatish Balay #if defined(PETSC_LOG)
82979bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
83079bdfe76SSatish Balay #endif
83179bdfe76SSatish Balay 
83279bdfe76SSatish Balay   PetscFree(baij->rowners);
83379bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
83479bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
83579bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
83679bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
83779bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
83879bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
83979bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
84030793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
84179bdfe76SSatish Balay   PetscFree(baij);
84290f02eecSBarry Smith   if (mat->mapping) {
84390f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
84490f02eecSBarry Smith   }
84579bdfe76SSatish Balay   PLogObjectDestroy(mat);
84679bdfe76SSatish Balay   PetscHeaderDestroy(mat);
84779bdfe76SSatish Balay   return 0;
84879bdfe76SSatish Balay }
84979bdfe76SSatish Balay 
8505615d1e5SSatish Balay #undef __FUNC__
8515615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
852ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
853cee3aa6bSSatish Balay {
854cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
85547b4a8eaSLois Curfman McInnes   int         ierr, nt;
856cee3aa6bSSatish Balay 
857c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
85847b4a8eaSLois Curfman McInnes   if (nt != a->n) {
859ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
86047b4a8eaSLois Curfman McInnes   }
861c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
86247b4a8eaSLois Curfman McInnes   if (nt != a->m) {
863e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
86447b4a8eaSLois Curfman McInnes   }
86543a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
866cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
86743a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
868cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
86943a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
870cee3aa6bSSatish Balay   return 0;
871cee3aa6bSSatish Balay }
872cee3aa6bSSatish Balay 
8735615d1e5SSatish Balay #undef __FUNC__
8745615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
875ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
876cee3aa6bSSatish Balay {
877cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
878cee3aa6bSSatish Balay   int        ierr;
87943a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
880cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
88143a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
882cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
883cee3aa6bSSatish Balay   return 0;
884cee3aa6bSSatish Balay }
885cee3aa6bSSatish Balay 
8865615d1e5SSatish Balay #undef __FUNC__
8875615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
888ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
889cee3aa6bSSatish Balay {
890cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
891cee3aa6bSSatish Balay   int        ierr;
892cee3aa6bSSatish Balay 
893cee3aa6bSSatish Balay   /* do nondiagonal part */
894cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
895cee3aa6bSSatish Balay   /* send it on its way */
896537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
897cee3aa6bSSatish Balay   /* do local part */
898cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
899cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
900cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
901cee3aa6bSSatish Balay   /* but is not perhaps always true. */
902639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
903cee3aa6bSSatish Balay   return 0;
904cee3aa6bSSatish Balay }
905cee3aa6bSSatish Balay 
9065615d1e5SSatish Balay #undef __FUNC__
9075615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
908ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
909cee3aa6bSSatish Balay {
910cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
911cee3aa6bSSatish Balay   int        ierr;
912cee3aa6bSSatish Balay 
913cee3aa6bSSatish Balay   /* do nondiagonal part */
914cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
915cee3aa6bSSatish Balay   /* send it on its way */
916537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
917cee3aa6bSSatish Balay   /* do local part */
918cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
919cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
920cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
921cee3aa6bSSatish Balay   /* but is not perhaps always true. */
922537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
923cee3aa6bSSatish Balay   return 0;
924cee3aa6bSSatish Balay }
925cee3aa6bSSatish Balay 
926cee3aa6bSSatish Balay /*
927cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
928cee3aa6bSSatish Balay    diagonal block
929cee3aa6bSSatish Balay */
9305615d1e5SSatish Balay #undef __FUNC__
9315615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
932ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
933cee3aa6bSSatish Balay {
934cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
935cee3aa6bSSatish Balay   if (a->M != a->N)
936e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
937cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
938cee3aa6bSSatish Balay }
939cee3aa6bSSatish Balay 
9405615d1e5SSatish Balay #undef __FUNC__
9415615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
942ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
943cee3aa6bSSatish Balay {
944cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
945cee3aa6bSSatish Balay   int        ierr;
946cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
947cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
948cee3aa6bSSatish Balay   return 0;
949cee3aa6bSSatish Balay }
950026e39d0SSatish Balay 
9515615d1e5SSatish Balay #undef __FUNC__
9525615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
953ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
95457b952d6SSatish Balay {
95557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
95657b952d6SSatish Balay   *m = mat->M; *n = mat->N;
95757b952d6SSatish Balay   return 0;
95857b952d6SSatish Balay }
95957b952d6SSatish Balay 
9605615d1e5SSatish Balay #undef __FUNC__
9615615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
962ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
96357b952d6SSatish Balay {
96457b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
96557b952d6SSatish Balay   *m = mat->m; *n = mat->N;
96657b952d6SSatish Balay   return 0;
96757b952d6SSatish Balay }
96857b952d6SSatish Balay 
9695615d1e5SSatish Balay #undef __FUNC__
9705615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
971ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
97257b952d6SSatish Balay {
97357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
97457b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
97557b952d6SSatish Balay   return 0;
97657b952d6SSatish Balay }
97757b952d6SSatish Balay 
978acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
979acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
980acdf5bf4SSatish Balay 
9815615d1e5SSatish Balay #undef __FUNC__
9825615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
983acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
984acdf5bf4SSatish Balay {
985acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
986acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
987acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
988d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
989d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
990acdf5bf4SSatish Balay 
991e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
992acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
993acdf5bf4SSatish Balay 
994acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
995acdf5bf4SSatish Balay     /*
996acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
997acdf5bf4SSatish Balay     */
998acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
999bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1000bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1001acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1002acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1003acdf5bf4SSatish Balay     }
1004acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1005acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1006acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1007acdf5bf4SSatish Balay   }
1008acdf5bf4SSatish Balay 
1009acdf5bf4SSatish Balay 
1010e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
1011d9d09a02SSatish Balay   lrow = row - brstart;
1012acdf5bf4SSatish Balay 
1013acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1014acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1015acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1016acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1017acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1018acdf5bf4SSatish Balay   nztot = nzA + nzB;
1019acdf5bf4SSatish Balay 
1020acdf5bf4SSatish Balay   cmap  = mat->garray;
1021acdf5bf4SSatish Balay   if (v  || idx) {
1022acdf5bf4SSatish Balay     if (nztot) {
1023acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1024acdf5bf4SSatish Balay       int imark = -1;
1025acdf5bf4SSatish Balay       if (v) {
1026acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1027acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1028d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1029acdf5bf4SSatish Balay           else break;
1030acdf5bf4SSatish Balay         }
1031acdf5bf4SSatish Balay         imark = i;
1032acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1033acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1034acdf5bf4SSatish Balay       }
1035acdf5bf4SSatish Balay       if (idx) {
1036acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1037acdf5bf4SSatish Balay         if (imark > -1) {
1038acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1039bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1040acdf5bf4SSatish Balay           }
1041acdf5bf4SSatish Balay         } else {
1042acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1043d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1044d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1045acdf5bf4SSatish Balay             else break;
1046acdf5bf4SSatish Balay           }
1047acdf5bf4SSatish Balay           imark = i;
1048acdf5bf4SSatish Balay         }
1049d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1050d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1051acdf5bf4SSatish Balay       }
1052acdf5bf4SSatish Balay     }
1053d212a18eSSatish Balay     else {
1054d212a18eSSatish Balay       if (idx) *idx = 0;
1055d212a18eSSatish Balay       if (v)   *v   = 0;
1056d212a18eSSatish Balay     }
1057acdf5bf4SSatish Balay   }
1058acdf5bf4SSatish Balay   *nz = nztot;
1059acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1060acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1061acdf5bf4SSatish Balay   return 0;
1062acdf5bf4SSatish Balay }
1063acdf5bf4SSatish Balay 
10645615d1e5SSatish Balay #undef __FUNC__
10655615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1066acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1067acdf5bf4SSatish Balay {
1068acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1069acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1070e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
1071acdf5bf4SSatish Balay   }
1072acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
1073acdf5bf4SSatish Balay   return 0;
1074acdf5bf4SSatish Balay }
1075acdf5bf4SSatish Balay 
10765615d1e5SSatish Balay #undef __FUNC__
10775615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1078ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
10795a838052SSatish Balay {
10805a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
10815a838052SSatish Balay   *bs = baij->bs;
10825a838052SSatish Balay   return 0;
10835a838052SSatish Balay }
10845a838052SSatish Balay 
10855615d1e5SSatish Balay #undef __FUNC__
10865615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1087ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
108858667388SSatish Balay {
108958667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
109058667388SSatish Balay   int         ierr;
109158667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
109258667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
109358667388SSatish Balay   return 0;
109458667388SSatish Balay }
10950ac07820SSatish Balay 
10965615d1e5SSatish Balay #undef __FUNC__
10975615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1098ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
10990ac07820SSatish Balay {
11004e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
11014e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
11027d57db60SLois Curfman McInnes   int         ierr;
11037d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
11040ac07820SSatish Balay 
11054e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
11064e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
11074e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
11084e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
11094e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
11104e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
11114e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
11124e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
11134e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
11140ac07820SSatish Balay   if (flag == MAT_LOCAL) {
11154e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
11164e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
11174e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
11184e220ebcSLois Curfman McInnes     info->memory       = isend[3];
11194e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
11200ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1121dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
11224e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11234e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11244e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11254e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11264e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11270ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1128dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
11294e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
11304e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
11314e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
11324e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
11334e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
11340ac07820SSatish Balay   }
11354e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
11364e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
11374e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
11380ac07820SSatish Balay   return 0;
11390ac07820SSatish Balay }
11400ac07820SSatish Balay 
11415615d1e5SSatish Balay #undef __FUNC__
11425615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1143ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
114458667388SSatish Balay {
114558667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
114658667388SSatish Balay 
114758667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
114858667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
11496da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1150c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
115196854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1152c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1153b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1154b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1155b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1156aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
115758667388SSatish Balay         MatSetOption(a->A,op);
115858667388SSatish Balay         MatSetOption(a->B,op);
1159b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
11606da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
116158667388SSatish Balay              op == MAT_SYMMETRIC ||
116258667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
116358667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
116458667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
116558667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
116658667388SSatish Balay     a->roworiented = 0;
116758667388SSatish Balay     MatSetOption(a->A,op);
116858667388SSatish Balay     MatSetOption(a->B,op);
11692b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
117090f02eecSBarry Smith     a->donotstash = 1;
117190f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1172e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
117358667388SSatish Balay   else
1174e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
117558667388SSatish Balay   return 0;
117658667388SSatish Balay }
117758667388SSatish Balay 
11785615d1e5SSatish Balay #undef __FUNC__
11795615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1180ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
11810ac07820SSatish Balay {
11820ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
11830ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
11840ac07820SSatish Balay   Mat        B;
11850ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
11860ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
11870ac07820SSatish Balay   Scalar     *a;
11880ac07820SSatish Balay 
11890ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1190e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
11910ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
11920ac07820SSatish Balay   CHKERRQ(ierr);
11930ac07820SSatish Balay 
11940ac07820SSatish Balay   /* copy over the A part */
11950ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
11960ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11970ac07820SSatish Balay   row = baij->rstart;
11980ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
11990ac07820SSatish Balay 
12000ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12010ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12020ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12030ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12040ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
12050ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12060ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12070ac07820SSatish Balay         col++; a += bs;
12080ac07820SSatish Balay       }
12090ac07820SSatish Balay     }
12100ac07820SSatish Balay   }
12110ac07820SSatish Balay   /* copy over the B part */
12120ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
12130ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
12140ac07820SSatish Balay   row = baij->rstart*bs;
12150ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
12160ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
12170ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
12180ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
12190ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
12200ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
12210ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
12220ac07820SSatish Balay         col++; a += bs;
12230ac07820SSatish Balay       }
12240ac07820SSatish Balay     }
12250ac07820SSatish Balay   }
12260ac07820SSatish Balay   PetscFree(rvals);
12270ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12280ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12290ac07820SSatish Balay 
12300ac07820SSatish Balay   if (matout != PETSC_NULL) {
12310ac07820SSatish Balay     *matout = B;
12320ac07820SSatish Balay   } else {
12330ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
12340ac07820SSatish Balay     PetscFree(baij->rowners);
12350ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
12360ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
12370ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
12380ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
12390ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
12400ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
12410ac07820SSatish Balay     PetscFree(baij);
1242*f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
12430ac07820SSatish Balay     PetscHeaderDestroy(B);
12440ac07820SSatish Balay   }
12450ac07820SSatish Balay   return 0;
12460ac07820SSatish Balay }
12470e95ebc0SSatish Balay 
12485615d1e5SSatish Balay #undef __FUNC__
12495615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
12500e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
12510e95ebc0SSatish Balay {
12520e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
12530e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
12540e95ebc0SSatish Balay   int ierr,s1,s2,s3;
12550e95ebc0SSatish Balay 
12560e95ebc0SSatish Balay   if (ll)  {
12570e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
12580e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1259e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
12600e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
12610e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
12620e95ebc0SSatish Balay   }
1263e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
12640e95ebc0SSatish Balay   return 0;
12650e95ebc0SSatish Balay }
12660e95ebc0SSatish Balay 
12670ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
12680ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
12690ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
12700ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
12710ac07820SSatish Balay    routine.
12720ac07820SSatish Balay */
12735615d1e5SSatish Balay #undef __FUNC__
12745615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1275ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
12760ac07820SSatish Balay {
12770ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
12780ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
12790ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
12800ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
12810ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
12820ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
12830ac07820SSatish Balay   MPI_Comm       comm = A->comm;
12840ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
12850ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
12860ac07820SSatish Balay   IS             istmp;
12870ac07820SSatish Balay 
12880ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
12890ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
12900ac07820SSatish Balay 
12910ac07820SSatish Balay   /*  first count number of contributors to each processor */
12920ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
12930ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
12940ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
12950ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
12960ac07820SSatish Balay     idx = rows[i];
12970ac07820SSatish Balay     found = 0;
12980ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
12990ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
13000ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
13010ac07820SSatish Balay       }
13020ac07820SSatish Balay     }
1303e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
13040ac07820SSatish Balay   }
13050ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
13060ac07820SSatish Balay 
13070ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
13080ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
13090ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
13100ac07820SSatish Balay   nrecvs = work[rank];
13110ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
13120ac07820SSatish Balay   nmax = work[rank];
13130ac07820SSatish Balay   PetscFree(work);
13140ac07820SSatish Balay 
13150ac07820SSatish Balay   /* post receives:   */
13160ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
13170ac07820SSatish Balay   CHKPTRQ(rvalues);
13180ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
13190ac07820SSatish Balay   CHKPTRQ(recv_waits);
13200ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13210ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
13220ac07820SSatish Balay   }
13230ac07820SSatish Balay 
13240ac07820SSatish Balay   /* do sends:
13250ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
13260ac07820SSatish Balay          the ith processor
13270ac07820SSatish Balay   */
13280ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
13290ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
13300ac07820SSatish Balay   CHKPTRQ(send_waits);
13310ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
13320ac07820SSatish Balay   starts[0] = 0;
13330ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13340ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
13350ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
13360ac07820SSatish Balay   }
13370ac07820SSatish Balay   ISRestoreIndices(is,&rows);
13380ac07820SSatish Balay 
13390ac07820SSatish Balay   starts[0] = 0;
13400ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
13410ac07820SSatish Balay   count = 0;
13420ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
13430ac07820SSatish Balay     if (procs[i]) {
13440ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
13450ac07820SSatish Balay     }
13460ac07820SSatish Balay   }
13470ac07820SSatish Balay   PetscFree(starts);
13480ac07820SSatish Balay 
13490ac07820SSatish Balay   base = owners[rank]*bs;
13500ac07820SSatish Balay 
13510ac07820SSatish Balay   /*  wait on receives */
13520ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
13530ac07820SSatish Balay   source = lens + nrecvs;
13540ac07820SSatish Balay   count  = nrecvs; slen = 0;
13550ac07820SSatish Balay   while (count) {
13560ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
13570ac07820SSatish Balay     /* unpack receives into our local space */
13580ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
13590ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
13600ac07820SSatish Balay     lens[imdex]  = n;
13610ac07820SSatish Balay     slen += n;
13620ac07820SSatish Balay     count--;
13630ac07820SSatish Balay   }
13640ac07820SSatish Balay   PetscFree(recv_waits);
13650ac07820SSatish Balay 
13660ac07820SSatish Balay   /* move the data into the send scatter */
13670ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
13680ac07820SSatish Balay   count = 0;
13690ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
13700ac07820SSatish Balay     values = rvalues + i*nmax;
13710ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
13720ac07820SSatish Balay       lrows[count++] = values[j] - base;
13730ac07820SSatish Balay     }
13740ac07820SSatish Balay   }
13750ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
13760ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
13770ac07820SSatish Balay 
13780ac07820SSatish Balay   /* actually zap the local rows */
1379029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
13800ac07820SSatish Balay   PLogObjectParent(A,istmp);
13810ac07820SSatish Balay   PetscFree(lrows);
13820ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
13830ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
13840ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
13850ac07820SSatish Balay 
13860ac07820SSatish Balay   /* wait on sends */
13870ac07820SSatish Balay   if (nsends) {
13880ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
13890ac07820SSatish Balay     CHKPTRQ(send_status);
13900ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
13910ac07820SSatish Balay     PetscFree(send_status);
13920ac07820SSatish Balay   }
13930ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
13940ac07820SSatish Balay 
13950ac07820SSatish Balay   return 0;
13960ac07820SSatish Balay }
1397ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
13985615d1e5SSatish Balay #undef __FUNC__
13995615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1400ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1401ba4ca20aSSatish Balay {
1402ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1403ba4ca20aSSatish Balay 
1404ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1405ba4ca20aSSatish Balay   else return 0;
1406ba4ca20aSSatish Balay }
14070ac07820SSatish Balay 
14085615d1e5SSatish Balay #undef __FUNC__
14095615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1410ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1411bb5a7306SBarry Smith {
1412bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1413bb5a7306SBarry Smith   int         ierr;
1414bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1415bb5a7306SBarry Smith   return 0;
1416bb5a7306SBarry Smith }
1417bb5a7306SBarry Smith 
14180ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
14190ac07820SSatish Balay 
142079bdfe76SSatish Balay /* -------------------------------------------------------------------*/
142179bdfe76SSatish Balay static struct _MatOps MatOps = {
1422bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
14234c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
14244c50302cSBarry Smith   0,0,0,0,
14250ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
14260e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
142758667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
14284c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
14294c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
14304c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
143194a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1432d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1433ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1434bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1435ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
143679bdfe76SSatish Balay 
143779bdfe76SSatish Balay 
14385615d1e5SSatish Balay #undef __FUNC__
14395615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
144079bdfe76SSatish Balay /*@C
144179bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
144279bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
144379bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
144479bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
144579bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
144679bdfe76SSatish Balay 
144779bdfe76SSatish Balay    Input Parameters:
144879bdfe76SSatish Balay .  comm - MPI communicator
144979bdfe76SSatish Balay .  bs   - size of blockk
145079bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
145192e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
145292e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
145392e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
145492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
145592e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
145679bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
145792e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
145879bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
145979bdfe76SSatish Balay            submatrix  (same for all local rows)
146092e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
146192e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
146292e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
146392e8d321SLois Curfman McInnes            it is zero.
146492e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
146579bdfe76SSatish Balay            submatrix (same for all local rows).
146692e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
146792e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
146892e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
146979bdfe76SSatish Balay 
147079bdfe76SSatish Balay    Output Parameter:
147179bdfe76SSatish Balay .  A - the matrix
147279bdfe76SSatish Balay 
147379bdfe76SSatish Balay    Notes:
147479bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
147579bdfe76SSatish Balay    (possibly both).
147679bdfe76SSatish Balay 
147779bdfe76SSatish Balay    Storage Information:
147879bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
147979bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
148079bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
148179bdfe76SSatish Balay    local matrix (a rectangular submatrix).
148279bdfe76SSatish Balay 
148379bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
148479bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
148579bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
148679bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
148779bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
148879bdfe76SSatish Balay 
148979bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
149079bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
149179bdfe76SSatish Balay 
149279bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
149379bdfe76SSatish Balay $         -------------------
149479bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
149579bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
149679bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
149779bdfe76SSatish Balay $         -------------------
149879bdfe76SSatish Balay $
149979bdfe76SSatish Balay 
150079bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
150179bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
150279bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
150357b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
150479bdfe76SSatish Balay 
150579bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
150679bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
150779bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
150892e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
150992e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
15106da5968aSLois Curfman McInnes    matrices.
151179bdfe76SSatish Balay 
151292e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
151379bdfe76SSatish Balay 
151479bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
151579bdfe76SSatish Balay @*/
151679bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
151779bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
151879bdfe76SSatish Balay {
151979bdfe76SSatish Balay   Mat          B;
152079bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
15214c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
152279bdfe76SSatish Balay 
1523e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
152479bdfe76SSatish Balay   *A = 0;
1525*f09e8eb9SSatish Balay   PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
152679bdfe76SSatish Balay   PLogObjectCreate(B);
152779bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
152879bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
152979bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
15304c50302cSBarry Smith   MPI_Comm_size(comm,&size);
15314c50302cSBarry Smith   if (size == 1) {
15324c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
15334c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
15344c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
15354c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
15364c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
15374c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
15384c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
15394c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
15404c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
15414c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
15424c50302cSBarry Smith   }
15434c50302cSBarry Smith 
154479bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
154579bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
154690f02eecSBarry Smith   B->mapping    = 0;
154779bdfe76SSatish Balay   B->factor     = 0;
154879bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
154979bdfe76SSatish Balay 
1550e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
155179bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
155279bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
155379bdfe76SSatish Balay 
155479bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1555e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1556e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1557e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1558cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1559cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
156079bdfe76SSatish Balay 
156179bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
156279bdfe76SSatish Balay     work[0] = m; work[1] = n;
156379bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
156479bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
156579bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
156679bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
156779bdfe76SSatish Balay   }
156879bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
156979bdfe76SSatish Balay     Mbs = M/bs;
1570e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
157179bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
157279bdfe76SSatish Balay     m   = mbs*bs;
157379bdfe76SSatish Balay   }
157479bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
157579bdfe76SSatish Balay     Nbs = N/bs;
1576e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
157779bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
157879bdfe76SSatish Balay     n   = nbs*bs;
157979bdfe76SSatish Balay   }
1580e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
158179bdfe76SSatish Balay 
158279bdfe76SSatish Balay   b->m = m; B->m = m;
158379bdfe76SSatish Balay   b->n = n; B->n = n;
158479bdfe76SSatish Balay   b->N = N; B->N = N;
158579bdfe76SSatish Balay   b->M = M; B->M = M;
158679bdfe76SSatish Balay   b->bs  = bs;
158779bdfe76SSatish Balay   b->bs2 = bs*bs;
158879bdfe76SSatish Balay   b->mbs = mbs;
158979bdfe76SSatish Balay   b->nbs = nbs;
159079bdfe76SSatish Balay   b->Mbs = Mbs;
159179bdfe76SSatish Balay   b->Nbs = Nbs;
159279bdfe76SSatish Balay 
159379bdfe76SSatish Balay   /* build local table of row and column ownerships */
159479bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1595*f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
15960ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
159779bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
159879bdfe76SSatish Balay   b->rowners[0] = 0;
159979bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
160079bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
160179bdfe76SSatish Balay   }
160279bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
160379bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
16044fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
16054fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
16064fa0d573SSatish Balay 
160779bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
160879bdfe76SSatish Balay   b->cowners[0] = 0;
160979bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
161079bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
161179bdfe76SSatish Balay   }
161279bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
161379bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
16144fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
16154fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
161679bdfe76SSatish Balay 
161779bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1618029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
161979bdfe76SSatish Balay   PLogObjectParent(B,b->A);
162079bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1621029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
162279bdfe76SSatish Balay   PLogObjectParent(B,b->B);
162379bdfe76SSatish Balay 
162479bdfe76SSatish Balay   /* build cache for off array entries formed */
162579bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
162690f02eecSBarry Smith   b->donotstash  = 0;
162779bdfe76SSatish Balay   b->colmap      = 0;
162879bdfe76SSatish Balay   b->garray      = 0;
162979bdfe76SSatish Balay   b->roworiented = 1;
163079bdfe76SSatish Balay 
163130793edcSSatish Balay   /* stuff used in block assembly */
163230793edcSSatish Balay   b->barray      = 0;
163330793edcSSatish Balay 
163479bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
163579bdfe76SSatish Balay   b->lvec        = 0;
163679bdfe76SSatish Balay   b->Mvctx       = 0;
163779bdfe76SSatish Balay 
163879bdfe76SSatish Balay   /* stuff for MatGetRow() */
163979bdfe76SSatish Balay   b->rowindices   = 0;
164079bdfe76SSatish Balay   b->rowvalues    = 0;
164179bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
164279bdfe76SSatish Balay 
164379bdfe76SSatish Balay   *A = B;
164479bdfe76SSatish Balay   return 0;
164579bdfe76SSatish Balay }
1646026e39d0SSatish Balay 
16475615d1e5SSatish Balay #undef __FUNC__
16485615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
16490ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
16500ac07820SSatish Balay {
16510ac07820SSatish Balay   Mat         mat;
16520ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
16530ac07820SSatish Balay   int         ierr, len=0, flg;
16540ac07820SSatish Balay 
16550ac07820SSatish Balay   *newmat       = 0;
1656*f09e8eb9SSatish Balay   PetscHeaderCreate(mat,_p_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
16570ac07820SSatish Balay   PLogObjectCreate(mat);
16580ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
16590ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
16600ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
16610ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
16620ac07820SSatish Balay   mat->factor     = matin->factor;
16630ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
16640ac07820SSatish Balay 
16650ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
16660ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
16670ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
16680ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
16690ac07820SSatish Balay 
16700ac07820SSatish Balay   a->bs  = oldmat->bs;
16710ac07820SSatish Balay   a->bs2 = oldmat->bs2;
16720ac07820SSatish Balay   a->mbs = oldmat->mbs;
16730ac07820SSatish Balay   a->nbs = oldmat->nbs;
16740ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
16750ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
16760ac07820SSatish Balay 
16770ac07820SSatish Balay   a->rstart       = oldmat->rstart;
16780ac07820SSatish Balay   a->rend         = oldmat->rend;
16790ac07820SSatish Balay   a->cstart       = oldmat->cstart;
16800ac07820SSatish Balay   a->cend         = oldmat->cend;
16810ac07820SSatish Balay   a->size         = oldmat->size;
16820ac07820SSatish Balay   a->rank         = oldmat->rank;
1683e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
16840ac07820SSatish Balay   a->rowvalues    = 0;
16850ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
168630793edcSSatish Balay   a->barray       = 0;
16870ac07820SSatish Balay 
16880ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
1689*f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
16900ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
16910ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
16920ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
16930ac07820SSatish Balay   if (oldmat->colmap) {
16940ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
16950ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
16960ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
16970ac07820SSatish Balay   } else a->colmap = 0;
16980ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
16990ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
17000ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
17010ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
17020ac07820SSatish Balay   } else a->garray = 0;
17030ac07820SSatish Balay 
17040ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
17050ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
17060ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
17070ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
17080ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
17090ac07820SSatish Balay   PLogObjectParent(mat,a->A);
17100ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
17110ac07820SSatish Balay   PLogObjectParent(mat,a->B);
17120ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
17130ac07820SSatish Balay   if (flg) {
17140ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
17150ac07820SSatish Balay   }
17160ac07820SSatish Balay   *newmat = mat;
17170ac07820SSatish Balay   return 0;
17180ac07820SSatish Balay }
171957b952d6SSatish Balay 
172057b952d6SSatish Balay #include "sys.h"
172157b952d6SSatish Balay 
17225615d1e5SSatish Balay #undef __FUNC__
17235615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
172457b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
172557b952d6SSatish Balay {
172657b952d6SSatish Balay   Mat          A;
172757b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
172857b952d6SSatish Balay   Scalar       *vals,*buf;
172957b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
173057b952d6SSatish Balay   MPI_Status   status;
1731cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
173257b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
173357b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
173457b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
173557b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
173657b952d6SSatish Balay 
173757b952d6SSatish Balay 
173857b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
173957b952d6SSatish Balay   bs2  = bs*bs;
174057b952d6SSatish Balay 
174157b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
174257b952d6SSatish Balay   if (!rank) {
174357b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
174457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1745e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
174657b952d6SSatish Balay   }
174757b952d6SSatish Balay 
174857b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
174957b952d6SSatish Balay   M = header[1]; N = header[2];
175057b952d6SSatish Balay 
1751e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
175257b952d6SSatish Balay 
175357b952d6SSatish Balay   /*
175457b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
175557b952d6SSatish Balay      divisible by the blocksize
175657b952d6SSatish Balay   */
175757b952d6SSatish Balay   Mbs        = M/bs;
175857b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
175957b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
176057b952d6SSatish Balay   else                  Mbs++;
176157b952d6SSatish Balay   if (extra_rows &&!rank) {
1762b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
176357b952d6SSatish Balay   }
1764537820f0SBarry Smith 
176557b952d6SSatish Balay   /* determine ownership of all rows */
176657b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
176757b952d6SSatish Balay   m   = mbs * bs;
1768cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1769cee3aa6bSSatish Balay   browners = rowners + size + 1;
177057b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
177157b952d6SSatish Balay   rowners[0] = 0;
1772cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1773cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
177457b952d6SSatish Balay   rstart = rowners[rank];
177557b952d6SSatish Balay   rend   = rowners[rank+1];
177657b952d6SSatish Balay 
177757b952d6SSatish Balay   /* distribute row lengths to all processors */
177857b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
177957b952d6SSatish Balay   if (!rank) {
178057b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
178157b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
178257b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
178357b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1784cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1785cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
178657b952d6SSatish Balay     PetscFree(sndcounts);
178757b952d6SSatish Balay   }
178857b952d6SSatish Balay   else {
178957b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
179057b952d6SSatish Balay   }
179157b952d6SSatish Balay 
179257b952d6SSatish Balay   if (!rank) {
179357b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
179457b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
179557b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
179657b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
179757b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
179857b952d6SSatish Balay         procsnz[i] += rowlengths[j];
179957b952d6SSatish Balay       }
180057b952d6SSatish Balay     }
180157b952d6SSatish Balay     PetscFree(rowlengths);
180257b952d6SSatish Balay 
180357b952d6SSatish Balay     /* determine max buffer needed and allocate it */
180457b952d6SSatish Balay     maxnz = 0;
180557b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
180657b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
180757b952d6SSatish Balay     }
180857b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
180957b952d6SSatish Balay 
181057b952d6SSatish Balay     /* read in my part of the matrix column indices  */
181157b952d6SSatish Balay     nz = procsnz[0];
181257b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
181357b952d6SSatish Balay     mycols = ibuf;
1814cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
181557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1816cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1817cee3aa6bSSatish Balay 
181857b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
181957b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
182057b952d6SSatish Balay       nz = procsnz[i];
182157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
182257b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
182357b952d6SSatish Balay     }
182457b952d6SSatish Balay     /* read in the stuff for the last proc */
182557b952d6SSatish Balay     if ( size != 1 ) {
182657b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
182757b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
182857b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1829cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
183057b952d6SSatish Balay     }
183157b952d6SSatish Balay     PetscFree(cols);
183257b952d6SSatish Balay   }
183357b952d6SSatish Balay   else {
183457b952d6SSatish Balay     /* determine buffer space needed for message */
183557b952d6SSatish Balay     nz = 0;
183657b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
183757b952d6SSatish Balay       nz += locrowlens[i];
183857b952d6SSatish Balay     }
183957b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
184057b952d6SSatish Balay     mycols = ibuf;
184157b952d6SSatish Balay     /* receive message of column indices*/
184257b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
184357b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1844e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
184557b952d6SSatish Balay   }
184657b952d6SSatish Balay 
184757b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1848cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1849cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
185057b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1851cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
185257b952d6SSatish Balay   masked1 = mask    + Mbs;
185357b952d6SSatish Balay   masked2 = masked1 + Mbs;
185457b952d6SSatish Balay   rowcount = 0; nzcount = 0;
185557b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
185657b952d6SSatish Balay     dcount  = 0;
185757b952d6SSatish Balay     odcount = 0;
185857b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
185957b952d6SSatish Balay       kmax = locrowlens[rowcount];
186057b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
186157b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
186257b952d6SSatish Balay         if (!mask[tmp]) {
186357b952d6SSatish Balay           mask[tmp] = 1;
186457b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
186557b952d6SSatish Balay           else masked1[dcount++] = tmp;
186657b952d6SSatish Balay         }
186757b952d6SSatish Balay       }
186857b952d6SSatish Balay       rowcount++;
186957b952d6SSatish Balay     }
1870cee3aa6bSSatish Balay 
187157b952d6SSatish Balay     dlens[i]  = dcount;
187257b952d6SSatish Balay     odlens[i] = odcount;
1873cee3aa6bSSatish Balay 
187457b952d6SSatish Balay     /* zero out the mask elements we set */
187557b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
187657b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
187757b952d6SSatish Balay   }
1878cee3aa6bSSatish Balay 
187957b952d6SSatish Balay   /* create our matrix */
1880537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1881537820f0SBarry Smith          CHKERRQ(ierr);
188257b952d6SSatish Balay   A = *newmat;
18836d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
188457b952d6SSatish Balay 
188557b952d6SSatish Balay   if (!rank) {
188657b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
188757b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
188857b952d6SSatish Balay     nz = procsnz[0];
188957b952d6SSatish Balay     vals = buf;
1890cee3aa6bSSatish Balay     mycols = ibuf;
1891cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
189257b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1893cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1894537820f0SBarry Smith 
189557b952d6SSatish Balay     /* insert into matrix */
189657b952d6SSatish Balay     jj      = rstart*bs;
189757b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
189857b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
189957b952d6SSatish Balay       mycols += locrowlens[i];
190057b952d6SSatish Balay       vals   += locrowlens[i];
190157b952d6SSatish Balay       jj++;
190257b952d6SSatish Balay     }
190357b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
190457b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
190557b952d6SSatish Balay       nz = procsnz[i];
190657b952d6SSatish Balay       vals = buf;
190757b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
190857b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
190957b952d6SSatish Balay     }
191057b952d6SSatish Balay     /* the last proc */
191157b952d6SSatish Balay     if ( size != 1 ){
191257b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1913cee3aa6bSSatish Balay       vals = buf;
191457b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
191557b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1916cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
191757b952d6SSatish Balay     }
191857b952d6SSatish Balay     PetscFree(procsnz);
191957b952d6SSatish Balay   }
192057b952d6SSatish Balay   else {
192157b952d6SSatish Balay     /* receive numeric values */
192257b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
192357b952d6SSatish Balay 
192457b952d6SSatish Balay     /* receive message of values*/
192557b952d6SSatish Balay     vals = buf;
1926cee3aa6bSSatish Balay     mycols = ibuf;
192757b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
192857b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1929e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
193057b952d6SSatish Balay 
193157b952d6SSatish Balay     /* insert into matrix */
193257b952d6SSatish Balay     jj      = rstart*bs;
1933cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
193457b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
193557b952d6SSatish Balay       mycols += locrowlens[i];
193657b952d6SSatish Balay       vals   += locrowlens[i];
193757b952d6SSatish Balay       jj++;
193857b952d6SSatish Balay     }
193957b952d6SSatish Balay   }
194057b952d6SSatish Balay   PetscFree(locrowlens);
194157b952d6SSatish Balay   PetscFree(buf);
194257b952d6SSatish Balay   PetscFree(ibuf);
194357b952d6SSatish Balay   PetscFree(rowners);
194457b952d6SSatish Balay   PetscFree(dlens);
1945cee3aa6bSSatish Balay   PetscFree(mask);
19466d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
19476d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
194857b952d6SSatish Balay   return 0;
194957b952d6SSatish Balay }
195057b952d6SSatish Balay 
195157b952d6SSatish Balay 
1952