xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ec1ea8d872e514c2ee36ef99689e0bb2f68c0548)
179bdfe76SSatish Balay #ifndef lint
2*ec1ea8d8SLois Curfman McInnes static char vcid[] = "$Id: mpibaij.c,v 1.56 1997/03/12 22:35:43 curfman Exp curfman $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
243b2fbd54SBarry Smith 
25537820f0SBarry Smith /*
26537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2957b952d6SSatish Balay    length of colmap equals the global matrix length.
3057b952d6SSatish Balay */
315615d1e5SSatish Balay #undef __FUNC__
325615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3457b952d6SSatish Balay {
3557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
37928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3857b952d6SSatish Balay 
3957b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
4057b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4157b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
42928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4357b952d6SSatish Balay   return 0;
4457b952d6SSatish Balay }
4557b952d6SSatish Balay 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
483b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
5057b952d6SSatish Balay {
513b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5257b952d6SSatish Balay   int         ierr;
533b2fbd54SBarry Smith   if (aij->size == 1) {
543b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
55e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
563b2fbd54SBarry Smith   return 0;
573b2fbd54SBarry Smith }
583b2fbd54SBarry Smith 
595615d1e5SSatish Balay #undef __FUNC__
605615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
613b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
623b2fbd54SBarry Smith                                 PetscTruth *done)
633b2fbd54SBarry Smith {
643b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
653b2fbd54SBarry Smith   int        ierr;
663b2fbd54SBarry Smith   if (aij->size == 1) {
673b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
68e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6957b952d6SSatish Balay   return 0;
7057b952d6SSatish Balay }
7180c1aa95SSatish Balay #define CHUNKSIZE  10
7280c1aa95SSatish Balay 
73f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7480c1aa95SSatish Balay { \
7580c1aa95SSatish Balay  \
7680c1aa95SSatish Balay     brow = row/bs;  \
7780c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
7880c1aa95SSatish Balay     rmax = imax[brow]; nrow = ailen[brow]; \
7980c1aa95SSatish Balay       bcol = col/bs; \
8080c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
81ab26458aSBarry Smith       low = 0; high = nrow; \
82ab26458aSBarry Smith       while (high-low > 3) { \
83ab26458aSBarry Smith         t = (low+high)/2; \
84ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
85ab26458aSBarry Smith         else              low  = t; \
86ab26458aSBarry Smith       } \
87ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
8880c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8980c1aa95SSatish Balay         if (rp[_i] == bcol) { \
9080c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
91eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
92eada6651SSatish Balay           else                    *bap  = value;  \
9380c1aa95SSatish Balay           goto _noinsert; \
9480c1aa95SSatish Balay         } \
9580c1aa95SSatish Balay       } \
960520107fSSatish Balay       if (a->nonew) goto _noinsert; \
9780c1aa95SSatish Balay       if (nrow >= rmax) { \
9880c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9980c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
10080c1aa95SSatish Balay         Scalar *new_a; \
10180c1aa95SSatish Balay  \
10280c1aa95SSatish Balay         /* malloc new storage space */ \
10380c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
10480c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
10580c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10680c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10780c1aa95SSatish Balay  \
10880c1aa95SSatish Balay         /* copy over old data into new slots */ \
10980c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
11080c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
11180c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
11280c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
11380c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
11480c1aa95SSatish Balay                                                            len*sizeof(int)); \
11580c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11680c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11780c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
11880c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
11980c1aa95SSatish Balay         /* free up old matrix storage */ \
12080c1aa95SSatish Balay         PetscFree(a->a);  \
12180c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
12280c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
12380c1aa95SSatish Balay         a->singlemalloc = 1; \
12480c1aa95SSatish Balay  \
12580c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
12680c1aa95SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE; \
12780c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
12880c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
12980c1aa95SSatish Balay         a->reallocs++; \
13080c1aa95SSatish Balay         a->nz++; \
13180c1aa95SSatish Balay       } \
13280c1aa95SSatish Balay       N = nrow++ - 1;  \
13380c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
13480c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
13580c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13680c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13780c1aa95SSatish Balay       } \
13880c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
13980c1aa95SSatish Balay       rp[_i]                      = bcol;  \
14080c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
14180c1aa95SSatish Balay       _noinsert:; \
14280c1aa95SSatish Balay     ailen[brow] = nrow; \
14380c1aa95SSatish Balay }
14457b952d6SSatish Balay 
145639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
1465615d1e5SSatish Balay #undef __FUNC__
1475615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
148*ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
14957b952d6SSatish Balay {
15057b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
15157b952d6SSatish Balay   Scalar      value;
1524fa0d573SSatish Balay   int         ierr,i,j,row,col;
1534fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1544fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1554fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
15657b952d6SSatish Balay 
157eada6651SSatish Balay   /* Some Variables required in the macro */
15880c1aa95SSatish Balay   Mat         A = baij->A;
15980c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
16080c1aa95SSatish Balay   int         *rp,ii,nrow,_i,rmax,N;
16180c1aa95SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
16280c1aa95SSatish Balay   int         *aj=a->j,brow,bcol;
163ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
16480c1aa95SSatish Balay   Scalar      *ap,*aa=a->a,*bap;
16580c1aa95SSatish Balay 
16657b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
167639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
168e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
169e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
170639f9d9dSBarry Smith #endif
17157b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
17257b952d6SSatish Balay       row = im[i] - rstart_orig;
17357b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
17457b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
17557b952d6SSatish Balay           col = in[j] - cstart_orig;
17657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
177f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
17880c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
17957b952d6SSatish Balay         }
180639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
181e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
182e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
183639f9d9dSBarry Smith #endif
18457b952d6SSatish Balay         else {
18557b952d6SSatish Balay           if (mat->was_assembled) {
186905e6a2fSBarry Smith             if (!baij->colmap) {
187905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
188905e6a2fSBarry Smith             }
189905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
19057b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
19157b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
19257b952d6SSatish Balay               col =  in[j];
19357b952d6SSatish Balay             }
19457b952d6SSatish Balay           }
19557b952d6SSatish Balay           else col = in[j];
19657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
197639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
19857b952d6SSatish Balay         }
19957b952d6SSatish Balay       }
20057b952d6SSatish Balay     }
20157b952d6SSatish Balay     else {
20290f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
20357b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
20457b952d6SSatish Balay       }
20557b952d6SSatish Balay       else {
20690f02eecSBarry Smith         if (!baij->donotstash) {
20757b952d6SSatish Balay           row = im[i];
20857b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
20957b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
21057b952d6SSatish Balay           }
21157b952d6SSatish Balay         }
21257b952d6SSatish Balay       }
21357b952d6SSatish Balay     }
21490f02eecSBarry Smith   }
21557b952d6SSatish Balay   return 0;
21657b952d6SSatish Balay }
21757b952d6SSatish Balay 
218ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
219ab26458aSBarry Smith #undef __FUNC__
220ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
221*ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
222ab26458aSBarry Smith {
223ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
224ab26458aSBarry Smith   Scalar      *value,*tmp;
225ab26458aSBarry Smith   int         ierr,i,j,ii,jj,row,col;
226ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
227ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
228ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
229ab26458aSBarry Smith 
230ab26458aSBarry Smith   /* Should be stashed somewhere to avoid multiple mallocs */
231ab26458aSBarry Smith   tmp = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(tmp);
232ab26458aSBarry Smith   if (roworiented) {
233ab26458aSBarry Smith     stepval = (n-1)*bs;
234ab26458aSBarry Smith   } else {
235ab26458aSBarry Smith     stepval = (m-1)*bs;
236ab26458aSBarry Smith   }
237ab26458aSBarry Smith   value = (Scalar *)PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(value);
238ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
239ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
240ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
241ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
242ab26458aSBarry Smith #endif
243ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
244ab26458aSBarry Smith       row = im[i] - rstart;
245ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
246ab26458aSBarry Smith         if (in[j] >= cstart && in[j] < cend){
247ab26458aSBarry Smith           col = in[j] - cstart;
248ab26458aSBarry Smith           if (roworiented) {
249ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
250ab26458aSBarry Smith             for ( ii=0; ii<bs; ii++,value+=stepval )
251ab26458aSBarry Smith               for (jj=ii; jj<bs2; jj+=bs )
252ab26458aSBarry Smith                 tmp[jj] = *value++;
253ab26458aSBarry Smith           } else {
254ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
255ab26458aSBarry Smith             for ( ii=0; ii<bs; ii++,value+=stepval )
256ab26458aSBarry Smith               for (jj=0; jj<bs; jj++ )
257ab26458aSBarry Smith                 *tmp++  = *value++;
258ab26458aSBarry Smith             tmp -=bs2;
259ab26458aSBarry Smith           }
260ab26458aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,tmp,addv);CHKERRQ(ierr);
261ab26458aSBarry Smith         }
262ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
263ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
264ab26458aSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");}
265ab26458aSBarry Smith #endif
266ab26458aSBarry Smith         else {
267ab26458aSBarry Smith           if (mat->was_assembled) {
268ab26458aSBarry Smith             if (!baij->colmap) {
269ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
270ab26458aSBarry Smith             }
271ab26458aSBarry Smith             col = baij->colmap[in[j]] - 1;
272ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
273ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
274ab26458aSBarry Smith               col =  in[j];
275ab26458aSBarry Smith             }
276ab26458aSBarry Smith           }
277ab26458aSBarry Smith           else col = in[j];
278ab26458aSBarry Smith           if (roworiented) {
279ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
280ab26458aSBarry Smith             for ( ii=0; ii<bs; ii++,value+=stepval )
281ab26458aSBarry Smith               for (jj=ii; jj<bs2; jj+=bs )
282ab26458aSBarry Smith                 tmp[jj] = *value++;
283ab26458aSBarry Smith           } else {
284ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
285ab26458aSBarry Smith             for ( ii=0; ii<bs; ii++,value+=stepval )
286ab26458aSBarry Smith               for (jj=0; jj<bs; jj++ )
287ab26458aSBarry Smith                 *tmp++  = *value++;
288ab26458aSBarry Smith             tmp -=bs2;
289ab26458aSBarry Smith           }
290ab26458aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,tmp,addv);CHKERRQ(ierr);
291ab26458aSBarry Smith         }
292ab26458aSBarry Smith       }
293ab26458aSBarry Smith     }
294ab26458aSBarry Smith     else {
295ab26458aSBarry Smith       if (!baij->donotstash) {
296ab26458aSBarry Smith         row = im[i]*bs;
297ab26458aSBarry Smith         if (roworiented ) {
298ab26458aSBarry Smith           for ( ii=0; ii<bs; i++,row++ ) {
299ab26458aSBarry Smith             for ( jj=0; jj<bs; jj++ ) {
300ab26458aSBarry Smith               ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
301ab26458aSBarry Smith             }
302ab26458aSBarry Smith           }
303ab26458aSBarry Smith         }
304ab26458aSBarry Smith         else {
305ab26458aSBarry Smith           row = im[i];
306ab26458aSBarry Smith 	  for ( j=0; j<n; j++ ) {
307ab26458aSBarry Smith 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
308ab26458aSBarry Smith           }
309ab26458aSBarry Smith         }
310ab26458aSBarry Smith       }
311ab26458aSBarry Smith     }
312ab26458aSBarry Smith   }
313ab26458aSBarry Smith   PetscFree(tmp);
314ab26458aSBarry Smith   return 0;
315ab26458aSBarry Smith }
316ab26458aSBarry Smith 
3175615d1e5SSatish Balay #undef __FUNC__
3185615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
319*ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
320d6de1c52SSatish Balay {
321d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
322d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
323d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
324d6de1c52SSatish Balay 
325d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
326e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
327e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
328d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
329d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
330d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
331e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
332e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
333d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
334d6de1c52SSatish Balay           col = idxn[j] - bscstart;
335d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
336d6de1c52SSatish Balay         }
337d6de1c52SSatish Balay         else {
338905e6a2fSBarry Smith           if (!baij->colmap) {
339905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
340905e6a2fSBarry Smith           }
341e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
342dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
343d9d09a02SSatish Balay           else {
344dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
345d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
346d6de1c52SSatish Balay           }
347d6de1c52SSatish Balay         }
348d6de1c52SSatish Balay       }
349d9d09a02SSatish Balay     }
350d6de1c52SSatish Balay     else {
351e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
352d6de1c52SSatish Balay     }
353d6de1c52SSatish Balay   }
354d6de1c52SSatish Balay   return 0;
355d6de1c52SSatish Balay }
356d6de1c52SSatish Balay 
3575615d1e5SSatish Balay #undef __FUNC__
3585615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
359*ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
360d6de1c52SSatish Balay {
361d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
362d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
363acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
364d6de1c52SSatish Balay   double     sum = 0.0;
365d6de1c52SSatish Balay   Scalar     *v;
366d6de1c52SSatish Balay 
367d6de1c52SSatish Balay   if (baij->size == 1) {
368d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
369d6de1c52SSatish Balay   } else {
370d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
371d6de1c52SSatish Balay       v = amat->a;
372d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
373d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
374d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
375d6de1c52SSatish Balay #else
376d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
377d6de1c52SSatish Balay #endif
378d6de1c52SSatish Balay       }
379d6de1c52SSatish Balay       v = bmat->a;
380d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
381d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
382d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
383d6de1c52SSatish Balay #else
384d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
385d6de1c52SSatish Balay #endif
386d6de1c52SSatish Balay       }
387d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
388d6de1c52SSatish Balay       *norm = sqrt(*norm);
389d6de1c52SSatish Balay     }
390acdf5bf4SSatish Balay     else
391e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
392d6de1c52SSatish Balay   }
393d6de1c52SSatish Balay   return 0;
394d6de1c52SSatish Balay }
39557b952d6SSatish Balay 
3965615d1e5SSatish Balay #undef __FUNC__
3975615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
398*ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
39957b952d6SSatish Balay {
40057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
40157b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
40257b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
40357b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
40457b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
40557b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
40657b952d6SSatish Balay   InsertMode  addv;
40757b952d6SSatish Balay   Scalar      *rvalues,*svalues;
40857b952d6SSatish Balay 
40957b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
410e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
41157b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
412e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
41357b952d6SSatish Balay   }
414e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
41557b952d6SSatish Balay 
41657b952d6SSatish Balay   /*  first count number of contributors to each processor */
41757b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
41857b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
41957b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
42057b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
42157b952d6SSatish Balay     idx = baij->stash.idx[i];
42257b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
42357b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
42457b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
42557b952d6SSatish Balay       }
42657b952d6SSatish Balay     }
42757b952d6SSatish Balay   }
42857b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
42957b952d6SSatish Balay 
43057b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
43157b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
43257b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
43357b952d6SSatish Balay   nreceives = work[rank];
43457b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
43557b952d6SSatish Balay   nmax = work[rank];
43657b952d6SSatish Balay   PetscFree(work);
43757b952d6SSatish Balay 
43857b952d6SSatish Balay   /* post receives:
43957b952d6SSatish Balay        1) each message will consist of ordered pairs
44057b952d6SSatish Balay      (global index,value) we store the global index as a double
44157b952d6SSatish Balay      to simplify the message passing.
44257b952d6SSatish Balay        2) since we don't know how long each individual message is we
44357b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
44457b952d6SSatish Balay      this is a lot of wasted space.
44557b952d6SSatish Balay 
44657b952d6SSatish Balay 
44757b952d6SSatish Balay        This could be done better.
44857b952d6SSatish Balay   */
44957b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
45057b952d6SSatish Balay   CHKPTRQ(rvalues);
45157b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
45257b952d6SSatish Balay   CHKPTRQ(recv_waits);
45357b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
45457b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
45557b952d6SSatish Balay               comm,recv_waits+i);
45657b952d6SSatish Balay   }
45757b952d6SSatish Balay 
45857b952d6SSatish Balay   /* do sends:
45957b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
46057b952d6SSatish Balay          the ith processor
46157b952d6SSatish Balay   */
46257b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
46357b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
46457b952d6SSatish Balay   CHKPTRQ(send_waits);
46557b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
46657b952d6SSatish Balay   starts[0] = 0;
46757b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
46857b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
46957b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
47057b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
47157b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
47257b952d6SSatish Balay   }
47357b952d6SSatish Balay   PetscFree(owner);
47457b952d6SSatish Balay   starts[0] = 0;
47557b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
47657b952d6SSatish Balay   count = 0;
47757b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
47857b952d6SSatish Balay     if (procs[i]) {
47957b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
48057b952d6SSatish Balay                 comm,send_waits+count++);
48157b952d6SSatish Balay     }
48257b952d6SSatish Balay   }
48357b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
48457b952d6SSatish Balay 
48557b952d6SSatish Balay   /* Free cache space */
486d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
48757b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
48857b952d6SSatish Balay 
48957b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
49057b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
49157b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
49257b952d6SSatish Balay   baij->rmax       = nmax;
49357b952d6SSatish Balay 
49457b952d6SSatish Balay   return 0;
49557b952d6SSatish Balay }
49657b952d6SSatish Balay 
49757b952d6SSatish Balay 
4985615d1e5SSatish Balay #undef __FUNC__
4995615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
500*ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
50157b952d6SSatish Balay {
50257b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
50357b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
50457b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
50557b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
50657b952d6SSatish Balay   Scalar      *values,val;
507e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
50857b952d6SSatish Balay 
50957b952d6SSatish Balay   /*  wait on receives */
51057b952d6SSatish Balay   while (count) {
51157b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
51257b952d6SSatish Balay     /* unpack receives into our local space */
51357b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
51457b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
51557b952d6SSatish Balay     n = n/3;
51657b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
51757b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
51857b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
51957b952d6SSatish Balay       val = values[3*i+2];
52057b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
52157b952d6SSatish Balay         col -= baij->cstart*bs;
52257b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
52357b952d6SSatish Balay       }
52457b952d6SSatish Balay       else {
52557b952d6SSatish Balay         if (mat->was_assembled) {
526905e6a2fSBarry Smith           if (!baij->colmap) {
527905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
528905e6a2fSBarry Smith           }
529905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
53057b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
53157b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
53257b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
53357b952d6SSatish Balay           }
53457b952d6SSatish Balay         }
53557b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
53657b952d6SSatish Balay       }
53757b952d6SSatish Balay     }
53857b952d6SSatish Balay     count--;
53957b952d6SSatish Balay   }
54057b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
54157b952d6SSatish Balay 
54257b952d6SSatish Balay   /* wait on sends */
54357b952d6SSatish Balay   if (baij->nsends) {
54457b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
54557b952d6SSatish Balay     CHKPTRQ(send_status);
54657b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
54757b952d6SSatish Balay     PetscFree(send_status);
54857b952d6SSatish Balay   }
54957b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
55057b952d6SSatish Balay 
55157b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
55257b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
55357b952d6SSatish Balay 
55457b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
55557b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
55657b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
55757b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
55857b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
55957b952d6SSatish Balay   }
56057b952d6SSatish Balay 
5616d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
56257b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
56357b952d6SSatish Balay   }
56457b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
56557b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
56657b952d6SSatish Balay 
56757b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
56857b952d6SSatish Balay   return 0;
56957b952d6SSatish Balay }
57057b952d6SSatish Balay 
5715615d1e5SSatish Balay #undef __FUNC__
5725615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
57357b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
57457b952d6SSatish Balay {
57557b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
57657b952d6SSatish Balay   int          ierr;
57757b952d6SSatish Balay 
57857b952d6SSatish Balay   if (baij->size == 1) {
57957b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
58057b952d6SSatish Balay   }
581e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
58257b952d6SSatish Balay   return 0;
58357b952d6SSatish Balay }
58457b952d6SSatish Balay 
5855615d1e5SSatish Balay #undef __FUNC__
5865615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
58757b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
58857b952d6SSatish Balay {
58957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
590cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
59157b952d6SSatish Balay   FILE         *fd;
59257b952d6SSatish Balay   ViewerType   vtype;
59357b952d6SSatish Balay 
59457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
59557b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
59657b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
597639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
5984e220ebcSLois Curfman McInnes       MatInfo info;
59957b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
60057b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6014e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
60257b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
60357b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
6044e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
6054e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
6064e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
6074e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
6084e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
6094e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
61057b952d6SSatish Balay       fflush(fd);
61157b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
61257b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
61357b952d6SSatish Balay       return 0;
61457b952d6SSatish Balay     }
615639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
616bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
61757b952d6SSatish Balay       return 0;
61857b952d6SSatish Balay     }
61957b952d6SSatish Balay   }
62057b952d6SSatish Balay 
62157b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
62257b952d6SSatish Balay     Draw       draw;
62357b952d6SSatish Balay     PetscTruth isnull;
62457b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
62557b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
62657b952d6SSatish Balay   }
62757b952d6SSatish Balay 
62857b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
62957b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
63057b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
63157b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
63257b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
63357b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
63457b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
63557b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
63657b952d6SSatish Balay     fflush(fd);
63757b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
63857b952d6SSatish Balay   }
63957b952d6SSatish Balay   else {
64057b952d6SSatish Balay     int size = baij->size;
64157b952d6SSatish Balay     rank = baij->rank;
64257b952d6SSatish Balay     if (size == 1) {
64357b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
64457b952d6SSatish Balay     }
64557b952d6SSatish Balay     else {
64657b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
64757b952d6SSatish Balay       Mat         A;
64857b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
64957b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
65057b952d6SSatish Balay       int         mbs=baij->mbs;
65157b952d6SSatish Balay       Scalar      *a;
65257b952d6SSatish Balay 
65357b952d6SSatish Balay       if (!rank) {
654cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
65557b952d6SSatish Balay         CHKERRQ(ierr);
65657b952d6SSatish Balay       }
65757b952d6SSatish Balay       else {
658cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
65957b952d6SSatish Balay         CHKERRQ(ierr);
66057b952d6SSatish Balay       }
66157b952d6SSatish Balay       PLogObjectParent(mat,A);
66257b952d6SSatish Balay 
66357b952d6SSatish Balay       /* copy over the A part */
66457b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
66557b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
66657b952d6SSatish Balay       row = baij->rstart;
66757b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
66857b952d6SSatish Balay 
66957b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
67057b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
67157b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
67257b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
67357b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
67457b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
675cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
676cee3aa6bSSatish Balay             col++; a += bs;
67757b952d6SSatish Balay           }
67857b952d6SSatish Balay         }
67957b952d6SSatish Balay       }
68057b952d6SSatish Balay       /* copy over the B part */
68157b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
68257b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
68357b952d6SSatish Balay       row = baij->rstart*bs;
68457b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
68557b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
68657b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
68757b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
68857b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
68957b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
690cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
691cee3aa6bSSatish Balay             col++; a += bs;
69257b952d6SSatish Balay           }
69357b952d6SSatish Balay         }
69457b952d6SSatish Balay       }
69557b952d6SSatish Balay       PetscFree(rvals);
6966d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
6976d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
69857b952d6SSatish Balay       if (!rank) {
69957b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
70057b952d6SSatish Balay       }
70157b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
70257b952d6SSatish Balay     }
70357b952d6SSatish Balay   }
70457b952d6SSatish Balay   return 0;
70557b952d6SSatish Balay }
70657b952d6SSatish Balay 
70757b952d6SSatish Balay 
70857b952d6SSatish Balay 
7095615d1e5SSatish Balay #undef __FUNC__
7105615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
711*ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
71257b952d6SSatish Balay {
71357b952d6SSatish Balay   Mat         mat = (Mat) obj;
71457b952d6SSatish Balay   int         ierr;
71557b952d6SSatish Balay   ViewerType  vtype;
71657b952d6SSatish Balay 
71757b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
71857b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
71957b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
72057b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
72157b952d6SSatish Balay   }
72257b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
72357b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
72457b952d6SSatish Balay   }
72557b952d6SSatish Balay   return 0;
72657b952d6SSatish Balay }
72757b952d6SSatish Balay 
7285615d1e5SSatish Balay #undef __FUNC__
7295615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
730*ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
73179bdfe76SSatish Balay {
73279bdfe76SSatish Balay   Mat         mat = (Mat) obj;
73379bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
73479bdfe76SSatish Balay   int         ierr;
73579bdfe76SSatish Balay 
73679bdfe76SSatish Balay #if defined(PETSC_LOG)
73779bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
73879bdfe76SSatish Balay #endif
73979bdfe76SSatish Balay 
74079bdfe76SSatish Balay   PetscFree(baij->rowners);
74179bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
74279bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
74379bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
74479bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
74579bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
74679bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
74779bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
74879bdfe76SSatish Balay   PetscFree(baij);
74990f02eecSBarry Smith   if (mat->mapping) {
75090f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
75190f02eecSBarry Smith   }
75279bdfe76SSatish Balay   PLogObjectDestroy(mat);
75379bdfe76SSatish Balay   PetscHeaderDestroy(mat);
75479bdfe76SSatish Balay   return 0;
75579bdfe76SSatish Balay }
75679bdfe76SSatish Balay 
7575615d1e5SSatish Balay #undef __FUNC__
7585615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
759*ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
760cee3aa6bSSatish Balay {
761cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
76247b4a8eaSLois Curfman McInnes   int         ierr, nt;
763cee3aa6bSSatish Balay 
764c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
76547b4a8eaSLois Curfman McInnes   if (nt != a->n) {
766ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
76747b4a8eaSLois Curfman McInnes   }
768c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
76947b4a8eaSLois Curfman McInnes   if (nt != a->m) {
770e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
77147b4a8eaSLois Curfman McInnes   }
77243a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
773cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
77443a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
775cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
77643a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
777cee3aa6bSSatish Balay   return 0;
778cee3aa6bSSatish Balay }
779cee3aa6bSSatish Balay 
7805615d1e5SSatish Balay #undef __FUNC__
7815615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
782*ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
783cee3aa6bSSatish Balay {
784cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
785cee3aa6bSSatish Balay   int        ierr;
78643a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
787cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
78843a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
789cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
790cee3aa6bSSatish Balay   return 0;
791cee3aa6bSSatish Balay }
792cee3aa6bSSatish Balay 
7935615d1e5SSatish Balay #undef __FUNC__
7945615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
795*ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
796cee3aa6bSSatish Balay {
797cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
798cee3aa6bSSatish Balay   int        ierr;
799cee3aa6bSSatish Balay 
800cee3aa6bSSatish Balay   /* do nondiagonal part */
801cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
802cee3aa6bSSatish Balay   /* send it on its way */
803537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
804cee3aa6bSSatish Balay   /* do local part */
805cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
806cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
807cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
808cee3aa6bSSatish Balay   /* but is not perhaps always true. */
809639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
810cee3aa6bSSatish Balay   return 0;
811cee3aa6bSSatish Balay }
812cee3aa6bSSatish Balay 
8135615d1e5SSatish Balay #undef __FUNC__
8145615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
815*ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
816cee3aa6bSSatish Balay {
817cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
818cee3aa6bSSatish Balay   int        ierr;
819cee3aa6bSSatish Balay 
820cee3aa6bSSatish Balay   /* do nondiagonal part */
821cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
822cee3aa6bSSatish Balay   /* send it on its way */
823537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
824cee3aa6bSSatish Balay   /* do local part */
825cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
826cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
827cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
828cee3aa6bSSatish Balay   /* but is not perhaps always true. */
829537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
830cee3aa6bSSatish Balay   return 0;
831cee3aa6bSSatish Balay }
832cee3aa6bSSatish Balay 
833cee3aa6bSSatish Balay /*
834cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
835cee3aa6bSSatish Balay    diagonal block
836cee3aa6bSSatish Balay */
8375615d1e5SSatish Balay #undef __FUNC__
8385615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
839*ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
840cee3aa6bSSatish Balay {
841cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
842cee3aa6bSSatish Balay   if (a->M != a->N)
843e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
844cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
845cee3aa6bSSatish Balay }
846cee3aa6bSSatish Balay 
8475615d1e5SSatish Balay #undef __FUNC__
8485615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
849*ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
850cee3aa6bSSatish Balay {
851cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
852cee3aa6bSSatish Balay   int        ierr;
853cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
854cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
855cee3aa6bSSatish Balay   return 0;
856cee3aa6bSSatish Balay }
857026e39d0SSatish Balay 
8585615d1e5SSatish Balay #undef __FUNC__
8595615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
860*ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
86157b952d6SSatish Balay {
86257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
86357b952d6SSatish Balay   *m = mat->M; *n = mat->N;
86457b952d6SSatish Balay   return 0;
86557b952d6SSatish Balay }
86657b952d6SSatish Balay 
8675615d1e5SSatish Balay #undef __FUNC__
8685615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
869*ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
87057b952d6SSatish Balay {
87157b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
87257b952d6SSatish Balay   *m = mat->m; *n = mat->N;
87357b952d6SSatish Balay   return 0;
87457b952d6SSatish Balay }
87557b952d6SSatish Balay 
8765615d1e5SSatish Balay #undef __FUNC__
8775615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
878*ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
87957b952d6SSatish Balay {
88057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
88157b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
88257b952d6SSatish Balay   return 0;
88357b952d6SSatish Balay }
88457b952d6SSatish Balay 
885acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
886acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
887acdf5bf4SSatish Balay 
8885615d1e5SSatish Balay #undef __FUNC__
8895615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
890acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
891acdf5bf4SSatish Balay {
892acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
893acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
894acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
895d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
896d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
897acdf5bf4SSatish Balay 
898e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
899acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
900acdf5bf4SSatish Balay 
901acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
902acdf5bf4SSatish Balay     /*
903acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
904acdf5bf4SSatish Balay     */
905acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
906bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
907bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
908acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
909acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
910acdf5bf4SSatish Balay     }
911acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
912acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
913acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
914acdf5bf4SSatish Balay   }
915acdf5bf4SSatish Balay 
916acdf5bf4SSatish Balay 
917e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
918d9d09a02SSatish Balay   lrow = row - brstart;
919acdf5bf4SSatish Balay 
920acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
921acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
922acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
923acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
924acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
925acdf5bf4SSatish Balay   nztot = nzA + nzB;
926acdf5bf4SSatish Balay 
927acdf5bf4SSatish Balay   cmap  = mat->garray;
928acdf5bf4SSatish Balay   if (v  || idx) {
929acdf5bf4SSatish Balay     if (nztot) {
930acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
931acdf5bf4SSatish Balay       int imark = -1;
932acdf5bf4SSatish Balay       if (v) {
933acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
934acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
935d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
936acdf5bf4SSatish Balay           else break;
937acdf5bf4SSatish Balay         }
938acdf5bf4SSatish Balay         imark = i;
939acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
940acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
941acdf5bf4SSatish Balay       }
942acdf5bf4SSatish Balay       if (idx) {
943acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
944acdf5bf4SSatish Balay         if (imark > -1) {
945acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
946bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
947acdf5bf4SSatish Balay           }
948acdf5bf4SSatish Balay         } else {
949acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
950d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
951d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
952acdf5bf4SSatish Balay             else break;
953acdf5bf4SSatish Balay           }
954acdf5bf4SSatish Balay           imark = i;
955acdf5bf4SSatish Balay         }
956d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
957d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
958acdf5bf4SSatish Balay       }
959acdf5bf4SSatish Balay     }
960d212a18eSSatish Balay     else {
961d212a18eSSatish Balay       if (idx) *idx = 0;
962d212a18eSSatish Balay       if (v)   *v   = 0;
963d212a18eSSatish Balay     }
964acdf5bf4SSatish Balay   }
965acdf5bf4SSatish Balay   *nz = nztot;
966acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
967acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
968acdf5bf4SSatish Balay   return 0;
969acdf5bf4SSatish Balay }
970acdf5bf4SSatish Balay 
9715615d1e5SSatish Balay #undef __FUNC__
9725615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
973acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
974acdf5bf4SSatish Balay {
975acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
976acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
977e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
978acdf5bf4SSatish Balay   }
979acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
980acdf5bf4SSatish Balay   return 0;
981acdf5bf4SSatish Balay }
982acdf5bf4SSatish Balay 
9835615d1e5SSatish Balay #undef __FUNC__
9845615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
985*ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
9865a838052SSatish Balay {
9875a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
9885a838052SSatish Balay   *bs = baij->bs;
9895a838052SSatish Balay   return 0;
9905a838052SSatish Balay }
9915a838052SSatish Balay 
9925615d1e5SSatish Balay #undef __FUNC__
9935615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
994*ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
99558667388SSatish Balay {
99658667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
99758667388SSatish Balay   int         ierr;
99858667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
99958667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
100058667388SSatish Balay   return 0;
100158667388SSatish Balay }
10020ac07820SSatish Balay 
10035615d1e5SSatish Balay #undef __FUNC__
10045615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1005*ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
10060ac07820SSatish Balay {
10074e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
10084e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
10097d57db60SLois Curfman McInnes   int         ierr;
10107d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
10110ac07820SSatish Balay 
10124e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
10134e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
10144e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
10154e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
10164e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
10174e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
10184e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
10194e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
10204e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
10210ac07820SSatish Balay   if (flag == MAT_LOCAL) {
10224e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
10234e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
10244e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
10254e220ebcSLois Curfman McInnes     info->memory       = isend[3];
10264e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
10270ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1028dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
10294e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10304e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10314e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10324e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10334e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
10340ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1035dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
10364e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10374e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10384e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10394e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10404e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
10410ac07820SSatish Balay   }
10424e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
10434e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
10444e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
10450ac07820SSatish Balay   return 0;
10460ac07820SSatish Balay }
10470ac07820SSatish Balay 
10485615d1e5SSatish Balay #undef __FUNC__
10495615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1050*ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
105158667388SSatish Balay {
105258667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
105358667388SSatish Balay 
105458667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
105558667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
10566da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1057c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
1058c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1059b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1060b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1061b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1062aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
106358667388SSatish Balay         MatSetOption(a->A,op);
106458667388SSatish Balay         MatSetOption(a->B,op);
1065b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
10666da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
106758667388SSatish Balay              op == MAT_SYMMETRIC ||
106858667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
106958667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
107058667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
107158667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
107258667388SSatish Balay     a->roworiented = 0;
107358667388SSatish Balay     MatSetOption(a->A,op);
107458667388SSatish Balay     MatSetOption(a->B,op);
107590f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
107690f02eecSBarry Smith     a->donotstash = 1;
107790f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1078e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
107958667388SSatish Balay   else
1080e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
108158667388SSatish Balay   return 0;
108258667388SSatish Balay }
108358667388SSatish Balay 
10845615d1e5SSatish Balay #undef __FUNC__
10855615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1086*ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
10870ac07820SSatish Balay {
10880ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
10890ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
10900ac07820SSatish Balay   Mat        B;
10910ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
10920ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
10930ac07820SSatish Balay   Scalar     *a;
10940ac07820SSatish Balay 
10950ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1096e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
10970ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
10980ac07820SSatish Balay   CHKERRQ(ierr);
10990ac07820SSatish Balay 
11000ac07820SSatish Balay   /* copy over the A part */
11010ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
11020ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11030ac07820SSatish Balay   row = baij->rstart;
11040ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
11050ac07820SSatish Balay 
11060ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
11070ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
11080ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
11090ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
11100ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
11110ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
11120ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
11130ac07820SSatish Balay         col++; a += bs;
11140ac07820SSatish Balay       }
11150ac07820SSatish Balay     }
11160ac07820SSatish Balay   }
11170ac07820SSatish Balay   /* copy over the B part */
11180ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
11190ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11200ac07820SSatish Balay   row = baij->rstart*bs;
11210ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
11220ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
11230ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
11240ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
11250ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
11260ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
11270ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
11280ac07820SSatish Balay         col++; a += bs;
11290ac07820SSatish Balay       }
11300ac07820SSatish Balay     }
11310ac07820SSatish Balay   }
11320ac07820SSatish Balay   PetscFree(rvals);
11330ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11340ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11350ac07820SSatish Balay 
11360ac07820SSatish Balay   if (matout != PETSC_NULL) {
11370ac07820SSatish Balay     *matout = B;
11380ac07820SSatish Balay   } else {
11390ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
11400ac07820SSatish Balay     PetscFree(baij->rowners);
11410ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
11420ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
11430ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
11440ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
11450ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
11460ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
11470ac07820SSatish Balay     PetscFree(baij);
11480ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
11490ac07820SSatish Balay     PetscHeaderDestroy(B);
11500ac07820SSatish Balay   }
11510ac07820SSatish Balay   return 0;
11520ac07820SSatish Balay }
11530e95ebc0SSatish Balay 
11545615d1e5SSatish Balay #undef __FUNC__
11555615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
11560e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
11570e95ebc0SSatish Balay {
11580e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
11590e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
11600e95ebc0SSatish Balay   int ierr,s1,s2,s3;
11610e95ebc0SSatish Balay 
11620e95ebc0SSatish Balay   if (ll)  {
11630e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
11640e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1165e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
11660e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
11670e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
11680e95ebc0SSatish Balay   }
1169e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
11700e95ebc0SSatish Balay   return 0;
11710e95ebc0SSatish Balay }
11720e95ebc0SSatish Balay 
11730ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
11740ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
11750ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
11760ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
11770ac07820SSatish Balay    routine.
11780ac07820SSatish Balay */
11795615d1e5SSatish Balay #undef __FUNC__
11805615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1181*ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
11820ac07820SSatish Balay {
11830ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
11840ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
11850ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
11860ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
11870ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
11880ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
11890ac07820SSatish Balay   MPI_Comm       comm = A->comm;
11900ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
11910ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
11920ac07820SSatish Balay   IS             istmp;
11930ac07820SSatish Balay 
11940ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
11950ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
11960ac07820SSatish Balay 
11970ac07820SSatish Balay   /*  first count number of contributors to each processor */
11980ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
11990ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
12000ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
12010ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
12020ac07820SSatish Balay     idx = rows[i];
12030ac07820SSatish Balay     found = 0;
12040ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
12050ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
12060ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
12070ac07820SSatish Balay       }
12080ac07820SSatish Balay     }
1209e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
12100ac07820SSatish Balay   }
12110ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
12120ac07820SSatish Balay 
12130ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
12140ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
12150ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
12160ac07820SSatish Balay   nrecvs = work[rank];
12170ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
12180ac07820SSatish Balay   nmax = work[rank];
12190ac07820SSatish Balay   PetscFree(work);
12200ac07820SSatish Balay 
12210ac07820SSatish Balay   /* post receives:   */
12220ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
12230ac07820SSatish Balay   CHKPTRQ(rvalues);
12240ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
12250ac07820SSatish Balay   CHKPTRQ(recv_waits);
12260ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
12270ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
12280ac07820SSatish Balay   }
12290ac07820SSatish Balay 
12300ac07820SSatish Balay   /* do sends:
12310ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
12320ac07820SSatish Balay          the ith processor
12330ac07820SSatish Balay   */
12340ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
12350ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
12360ac07820SSatish Balay   CHKPTRQ(send_waits);
12370ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
12380ac07820SSatish Balay   starts[0] = 0;
12390ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
12400ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
12410ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
12420ac07820SSatish Balay   }
12430ac07820SSatish Balay   ISRestoreIndices(is,&rows);
12440ac07820SSatish Balay 
12450ac07820SSatish Balay   starts[0] = 0;
12460ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
12470ac07820SSatish Balay   count = 0;
12480ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
12490ac07820SSatish Balay     if (procs[i]) {
12500ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
12510ac07820SSatish Balay     }
12520ac07820SSatish Balay   }
12530ac07820SSatish Balay   PetscFree(starts);
12540ac07820SSatish Balay 
12550ac07820SSatish Balay   base = owners[rank]*bs;
12560ac07820SSatish Balay 
12570ac07820SSatish Balay   /*  wait on receives */
12580ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
12590ac07820SSatish Balay   source = lens + nrecvs;
12600ac07820SSatish Balay   count  = nrecvs; slen = 0;
12610ac07820SSatish Balay   while (count) {
12620ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
12630ac07820SSatish Balay     /* unpack receives into our local space */
12640ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
12650ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
12660ac07820SSatish Balay     lens[imdex]  = n;
12670ac07820SSatish Balay     slen += n;
12680ac07820SSatish Balay     count--;
12690ac07820SSatish Balay   }
12700ac07820SSatish Balay   PetscFree(recv_waits);
12710ac07820SSatish Balay 
12720ac07820SSatish Balay   /* move the data into the send scatter */
12730ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
12740ac07820SSatish Balay   count = 0;
12750ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
12760ac07820SSatish Balay     values = rvalues + i*nmax;
12770ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
12780ac07820SSatish Balay       lrows[count++] = values[j] - base;
12790ac07820SSatish Balay     }
12800ac07820SSatish Balay   }
12810ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
12820ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
12830ac07820SSatish Balay 
12840ac07820SSatish Balay   /* actually zap the local rows */
1285537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
12860ac07820SSatish Balay   PLogObjectParent(A,istmp);
12870ac07820SSatish Balay   PetscFree(lrows);
12880ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
12890ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
12900ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
12910ac07820SSatish Balay 
12920ac07820SSatish Balay   /* wait on sends */
12930ac07820SSatish Balay   if (nsends) {
12940ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
12950ac07820SSatish Balay     CHKPTRQ(send_status);
12960ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
12970ac07820SSatish Balay     PetscFree(send_status);
12980ac07820SSatish Balay   }
12990ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
13000ac07820SSatish Balay 
13010ac07820SSatish Balay   return 0;
13020ac07820SSatish Balay }
1303ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
13045615d1e5SSatish Balay #undef __FUNC__
13055615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1306*ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1307ba4ca20aSSatish Balay {
1308ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1309ba4ca20aSSatish Balay 
1310ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1311ba4ca20aSSatish Balay   else return 0;
1312ba4ca20aSSatish Balay }
13130ac07820SSatish Balay 
13145615d1e5SSatish Balay #undef __FUNC__
13155615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1316*ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1317bb5a7306SBarry Smith {
1318bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1319bb5a7306SBarry Smith   int         ierr;
1320bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1321bb5a7306SBarry Smith   return 0;
1322bb5a7306SBarry Smith }
1323bb5a7306SBarry Smith 
13240ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
13250ac07820SSatish Balay 
132679bdfe76SSatish Balay /* -------------------------------------------------------------------*/
132779bdfe76SSatish Balay static struct _MatOps MatOps = {
1328bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
13294c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
13304c50302cSBarry Smith   0,0,0,0,
13310ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
13320e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
133358667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
13344c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
13354c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
13364c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
133794a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1338d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1339ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1340bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1341ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
134279bdfe76SSatish Balay 
134379bdfe76SSatish Balay 
13445615d1e5SSatish Balay #undef __FUNC__
13455615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
134679bdfe76SSatish Balay /*@C
134779bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
134879bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
134979bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
135079bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
135179bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
135279bdfe76SSatish Balay 
135379bdfe76SSatish Balay    Input Parameters:
135479bdfe76SSatish Balay .  comm - MPI communicator
135579bdfe76SSatish Balay .  bs   - size of blockk
135679bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
135792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
135892e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
135992e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
136092e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
136192e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
136279bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
136392e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
136479bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
136579bdfe76SSatish Balay            submatrix  (same for all local rows)
136692e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
136792e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
136892e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
136992e8d321SLois Curfman McInnes            it is zero.
137092e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
137179bdfe76SSatish Balay            submatrix (same for all local rows).
137292e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
137392e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
137492e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
137579bdfe76SSatish Balay 
137679bdfe76SSatish Balay    Output Parameter:
137779bdfe76SSatish Balay .  A - the matrix
137879bdfe76SSatish Balay 
137979bdfe76SSatish Balay    Notes:
138079bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
138179bdfe76SSatish Balay    (possibly both).
138279bdfe76SSatish Balay 
138379bdfe76SSatish Balay    Storage Information:
138479bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
138579bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
138679bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
138779bdfe76SSatish Balay    local matrix (a rectangular submatrix).
138879bdfe76SSatish Balay 
138979bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
139079bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
139179bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
139279bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
139379bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
139479bdfe76SSatish Balay 
139579bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
139679bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
139779bdfe76SSatish Balay 
139879bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
139979bdfe76SSatish Balay $         -------------------
140079bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
140179bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
140279bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
140379bdfe76SSatish Balay $         -------------------
140479bdfe76SSatish Balay $
140579bdfe76SSatish Balay 
140679bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
140779bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
140879bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
140957b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
141079bdfe76SSatish Balay 
141179bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
141279bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
141379bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
141492e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
141592e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
14166da5968aSLois Curfman McInnes    matrices.
141779bdfe76SSatish Balay 
141892e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
141979bdfe76SSatish Balay 
142079bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
142179bdfe76SSatish Balay @*/
142279bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
142379bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
142479bdfe76SSatish Balay {
142579bdfe76SSatish Balay   Mat          B;
142679bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
14274c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
142879bdfe76SSatish Balay 
1429e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
143079bdfe76SSatish Balay   *A = 0;
143179bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
143279bdfe76SSatish Balay   PLogObjectCreate(B);
143379bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
143479bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
143579bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
14364c50302cSBarry Smith   MPI_Comm_size(comm,&size);
14374c50302cSBarry Smith   if (size == 1) {
14384c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
14394c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
14404c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
14414c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
14424c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
14434c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
14444c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
14454c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
14464c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
14474c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
14484c50302cSBarry Smith   }
14494c50302cSBarry Smith 
145079bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
145179bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
145290f02eecSBarry Smith   B->mapping    = 0;
145379bdfe76SSatish Balay   B->factor     = 0;
145479bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
145579bdfe76SSatish Balay 
1456e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
145779bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
145879bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
145979bdfe76SSatish Balay 
146079bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1461e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1462e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1463e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1464cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1465cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
146679bdfe76SSatish Balay 
146779bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
146879bdfe76SSatish Balay     work[0] = m; work[1] = n;
146979bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
147079bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
147179bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
147279bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
147379bdfe76SSatish Balay   }
147479bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
147579bdfe76SSatish Balay     Mbs = M/bs;
1476e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
147779bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
147879bdfe76SSatish Balay     m   = mbs*bs;
147979bdfe76SSatish Balay   }
148079bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
148179bdfe76SSatish Balay     Nbs = N/bs;
1482e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
148379bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
148479bdfe76SSatish Balay     n   = nbs*bs;
148579bdfe76SSatish Balay   }
1486e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
148779bdfe76SSatish Balay 
148879bdfe76SSatish Balay   b->m = m; B->m = m;
148979bdfe76SSatish Balay   b->n = n; B->n = n;
149079bdfe76SSatish Balay   b->N = N; B->N = N;
149179bdfe76SSatish Balay   b->M = M; B->M = M;
149279bdfe76SSatish Balay   b->bs  = bs;
149379bdfe76SSatish Balay   b->bs2 = bs*bs;
149479bdfe76SSatish Balay   b->mbs = mbs;
149579bdfe76SSatish Balay   b->nbs = nbs;
149679bdfe76SSatish Balay   b->Mbs = Mbs;
149779bdfe76SSatish Balay   b->Nbs = Nbs;
149879bdfe76SSatish Balay 
149979bdfe76SSatish Balay   /* build local table of row and column ownerships */
150079bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
150179bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
15020ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
150379bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
150479bdfe76SSatish Balay   b->rowners[0] = 0;
150579bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
150679bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
150779bdfe76SSatish Balay   }
150879bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
150979bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
15104fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
15114fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
15124fa0d573SSatish Balay 
151379bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
151479bdfe76SSatish Balay   b->cowners[0] = 0;
151579bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
151679bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
151779bdfe76SSatish Balay   }
151879bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
151979bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
15204fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
15214fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
152279bdfe76SSatish Balay 
152379bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
152479bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
152579bdfe76SSatish Balay   PLogObjectParent(B,b->A);
152679bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
152779bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
152879bdfe76SSatish Balay   PLogObjectParent(B,b->B);
152979bdfe76SSatish Balay 
153079bdfe76SSatish Balay   /* build cache for off array entries formed */
153179bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
153290f02eecSBarry Smith   b->donotstash  = 0;
153379bdfe76SSatish Balay   b->colmap      = 0;
153479bdfe76SSatish Balay   b->garray      = 0;
153579bdfe76SSatish Balay   b->roworiented = 1;
153679bdfe76SSatish Balay 
153779bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
153879bdfe76SSatish Balay   b->lvec      = 0;
153979bdfe76SSatish Balay   b->Mvctx     = 0;
154079bdfe76SSatish Balay 
154179bdfe76SSatish Balay   /* stuff for MatGetRow() */
154279bdfe76SSatish Balay   b->rowindices   = 0;
154379bdfe76SSatish Balay   b->rowvalues    = 0;
154479bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
154579bdfe76SSatish Balay 
154679bdfe76SSatish Balay   *A = B;
154779bdfe76SSatish Balay   return 0;
154879bdfe76SSatish Balay }
1549026e39d0SSatish Balay 
15505615d1e5SSatish Balay #undef __FUNC__
15515615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
15520ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
15530ac07820SSatish Balay {
15540ac07820SSatish Balay   Mat         mat;
15550ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
15560ac07820SSatish Balay   int         ierr, len=0, flg;
15570ac07820SSatish Balay 
15580ac07820SSatish Balay   *newmat       = 0;
15590ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
15600ac07820SSatish Balay   PLogObjectCreate(mat);
15610ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
15620ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
15630ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
15640ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
15650ac07820SSatish Balay   mat->factor     = matin->factor;
15660ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
15670ac07820SSatish Balay 
15680ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
15690ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
15700ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
15710ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
15720ac07820SSatish Balay 
15730ac07820SSatish Balay   a->bs  = oldmat->bs;
15740ac07820SSatish Balay   a->bs2 = oldmat->bs2;
15750ac07820SSatish Balay   a->mbs = oldmat->mbs;
15760ac07820SSatish Balay   a->nbs = oldmat->nbs;
15770ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
15780ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
15790ac07820SSatish Balay 
15800ac07820SSatish Balay   a->rstart       = oldmat->rstart;
15810ac07820SSatish Balay   a->rend         = oldmat->rend;
15820ac07820SSatish Balay   a->cstart       = oldmat->cstart;
15830ac07820SSatish Balay   a->cend         = oldmat->cend;
15840ac07820SSatish Balay   a->size         = oldmat->size;
15850ac07820SSatish Balay   a->rank         = oldmat->rank;
1586e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
15870ac07820SSatish Balay   a->rowvalues    = 0;
15880ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
15890ac07820SSatish Balay 
15900ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
15910ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
15920ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
15930ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
15940ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
15950ac07820SSatish Balay   if (oldmat->colmap) {
15960ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
15970ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
15980ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
15990ac07820SSatish Balay   } else a->colmap = 0;
16000ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
16010ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
16020ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
16030ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
16040ac07820SSatish Balay   } else a->garray = 0;
16050ac07820SSatish Balay 
16060ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
16070ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
16080ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
16090ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
16100ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
16110ac07820SSatish Balay   PLogObjectParent(mat,a->A);
16120ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
16130ac07820SSatish Balay   PLogObjectParent(mat,a->B);
16140ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
16150ac07820SSatish Balay   if (flg) {
16160ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
16170ac07820SSatish Balay   }
16180ac07820SSatish Balay   *newmat = mat;
16190ac07820SSatish Balay   return 0;
16200ac07820SSatish Balay }
162157b952d6SSatish Balay 
162257b952d6SSatish Balay #include "sys.h"
162357b952d6SSatish Balay 
16245615d1e5SSatish Balay #undef __FUNC__
16255615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
162657b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
162757b952d6SSatish Balay {
162857b952d6SSatish Balay   Mat          A;
162957b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
163057b952d6SSatish Balay   Scalar       *vals,*buf;
163157b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
163257b952d6SSatish Balay   MPI_Status   status;
1633cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
163457b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
163557b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
163657b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
163757b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
163857b952d6SSatish Balay 
163957b952d6SSatish Balay 
164057b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
164157b952d6SSatish Balay   bs2  = bs*bs;
164257b952d6SSatish Balay 
164357b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
164457b952d6SSatish Balay   if (!rank) {
164557b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
164657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1647e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
164857b952d6SSatish Balay   }
164957b952d6SSatish Balay 
165057b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
165157b952d6SSatish Balay   M = header[1]; N = header[2];
165257b952d6SSatish Balay 
1653e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
165457b952d6SSatish Balay 
165557b952d6SSatish Balay   /*
165657b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
165757b952d6SSatish Balay      divisible by the blocksize
165857b952d6SSatish Balay   */
165957b952d6SSatish Balay   Mbs        = M/bs;
166057b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
166157b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
166257b952d6SSatish Balay   else                  Mbs++;
166357b952d6SSatish Balay   if (extra_rows &&!rank) {
1664b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
166557b952d6SSatish Balay   }
1666537820f0SBarry Smith 
166757b952d6SSatish Balay   /* determine ownership of all rows */
166857b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
166957b952d6SSatish Balay   m   = mbs * bs;
1670cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1671cee3aa6bSSatish Balay   browners = rowners + size + 1;
167257b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
167357b952d6SSatish Balay   rowners[0] = 0;
1674cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1675cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
167657b952d6SSatish Balay   rstart = rowners[rank];
167757b952d6SSatish Balay   rend   = rowners[rank+1];
167857b952d6SSatish Balay 
167957b952d6SSatish Balay   /* distribute row lengths to all processors */
168057b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
168157b952d6SSatish Balay   if (!rank) {
168257b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
168357b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
168457b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
168557b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1686cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1687cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
168857b952d6SSatish Balay     PetscFree(sndcounts);
168957b952d6SSatish Balay   }
169057b952d6SSatish Balay   else {
169157b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
169257b952d6SSatish Balay   }
169357b952d6SSatish Balay 
169457b952d6SSatish Balay   if (!rank) {
169557b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
169657b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
169757b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
169857b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
169957b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
170057b952d6SSatish Balay         procsnz[i] += rowlengths[j];
170157b952d6SSatish Balay       }
170257b952d6SSatish Balay     }
170357b952d6SSatish Balay     PetscFree(rowlengths);
170457b952d6SSatish Balay 
170557b952d6SSatish Balay     /* determine max buffer needed and allocate it */
170657b952d6SSatish Balay     maxnz = 0;
170757b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
170857b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
170957b952d6SSatish Balay     }
171057b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
171157b952d6SSatish Balay 
171257b952d6SSatish Balay     /* read in my part of the matrix column indices  */
171357b952d6SSatish Balay     nz = procsnz[0];
171457b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
171557b952d6SSatish Balay     mycols = ibuf;
1716cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
171757b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1718cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1719cee3aa6bSSatish Balay 
172057b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
172157b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
172257b952d6SSatish Balay       nz = procsnz[i];
172357b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
172457b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
172557b952d6SSatish Balay     }
172657b952d6SSatish Balay     /* read in the stuff for the last proc */
172757b952d6SSatish Balay     if ( size != 1 ) {
172857b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
172957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
173057b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1731cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
173257b952d6SSatish Balay     }
173357b952d6SSatish Balay     PetscFree(cols);
173457b952d6SSatish Balay   }
173557b952d6SSatish Balay   else {
173657b952d6SSatish Balay     /* determine buffer space needed for message */
173757b952d6SSatish Balay     nz = 0;
173857b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
173957b952d6SSatish Balay       nz += locrowlens[i];
174057b952d6SSatish Balay     }
174157b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
174257b952d6SSatish Balay     mycols = ibuf;
174357b952d6SSatish Balay     /* receive message of column indices*/
174457b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
174557b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1746e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
174757b952d6SSatish Balay   }
174857b952d6SSatish Balay 
174957b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1750cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1751cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
175257b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1753cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
175457b952d6SSatish Balay   masked1 = mask    + Mbs;
175557b952d6SSatish Balay   masked2 = masked1 + Mbs;
175657b952d6SSatish Balay   rowcount = 0; nzcount = 0;
175757b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
175857b952d6SSatish Balay     dcount  = 0;
175957b952d6SSatish Balay     odcount = 0;
176057b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
176157b952d6SSatish Balay       kmax = locrowlens[rowcount];
176257b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
176357b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
176457b952d6SSatish Balay         if (!mask[tmp]) {
176557b952d6SSatish Balay           mask[tmp] = 1;
176657b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
176757b952d6SSatish Balay           else masked1[dcount++] = tmp;
176857b952d6SSatish Balay         }
176957b952d6SSatish Balay       }
177057b952d6SSatish Balay       rowcount++;
177157b952d6SSatish Balay     }
1772cee3aa6bSSatish Balay 
177357b952d6SSatish Balay     dlens[i]  = dcount;
177457b952d6SSatish Balay     odlens[i] = odcount;
1775cee3aa6bSSatish Balay 
177657b952d6SSatish Balay     /* zero out the mask elements we set */
177757b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
177857b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
177957b952d6SSatish Balay   }
1780cee3aa6bSSatish Balay 
178157b952d6SSatish Balay   /* create our matrix */
1782537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1783537820f0SBarry Smith          CHKERRQ(ierr);
178457b952d6SSatish Balay   A = *newmat;
17856d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
178657b952d6SSatish Balay 
178757b952d6SSatish Balay   if (!rank) {
178857b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
178957b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
179057b952d6SSatish Balay     nz = procsnz[0];
179157b952d6SSatish Balay     vals = buf;
1792cee3aa6bSSatish Balay     mycols = ibuf;
1793cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
179457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1795cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1796537820f0SBarry Smith 
179757b952d6SSatish Balay     /* insert into matrix */
179857b952d6SSatish Balay     jj      = rstart*bs;
179957b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
180057b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
180157b952d6SSatish Balay       mycols += locrowlens[i];
180257b952d6SSatish Balay       vals   += locrowlens[i];
180357b952d6SSatish Balay       jj++;
180457b952d6SSatish Balay     }
180557b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
180657b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
180757b952d6SSatish Balay       nz = procsnz[i];
180857b952d6SSatish Balay       vals = buf;
180957b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
181057b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
181157b952d6SSatish Balay     }
181257b952d6SSatish Balay     /* the last proc */
181357b952d6SSatish Balay     if ( size != 1 ){
181457b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1815cee3aa6bSSatish Balay       vals = buf;
181657b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
181757b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1818cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
181957b952d6SSatish Balay     }
182057b952d6SSatish Balay     PetscFree(procsnz);
182157b952d6SSatish Balay   }
182257b952d6SSatish Balay   else {
182357b952d6SSatish Balay     /* receive numeric values */
182457b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
182557b952d6SSatish Balay 
182657b952d6SSatish Balay     /* receive message of values*/
182757b952d6SSatish Balay     vals = buf;
1828cee3aa6bSSatish Balay     mycols = ibuf;
182957b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
183057b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1831e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
183257b952d6SSatish Balay 
183357b952d6SSatish Balay     /* insert into matrix */
183457b952d6SSatish Balay     jj      = rstart*bs;
1835cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
183657b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
183757b952d6SSatish Balay       mycols += locrowlens[i];
183857b952d6SSatish Balay       vals   += locrowlens[i];
183957b952d6SSatish Balay       jj++;
184057b952d6SSatish Balay     }
184157b952d6SSatish Balay   }
184257b952d6SSatish Balay   PetscFree(locrowlens);
184357b952d6SSatish Balay   PetscFree(buf);
184457b952d6SSatish Balay   PetscFree(ibuf);
184557b952d6SSatish Balay   PetscFree(rowners);
184657b952d6SSatish Balay   PetscFree(dlens);
1847cee3aa6bSSatish Balay   PetscFree(mask);
18486d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
18496d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
185057b952d6SSatish Balay   return 0;
185157b952d6SSatish Balay }
185257b952d6SSatish Balay 
185357b952d6SSatish Balay 
1854