xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ab26458ad20c45ad81c626c7231f19db60fa0092)
179bdfe76SSatish Balay #ifndef lint
2*ab26458aSBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.52 1997/01/27 18:17:11 bsmith Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
243b2fbd54SBarry Smith 
25537820f0SBarry Smith /*
26537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2957b952d6SSatish Balay    length of colmap equals the global matrix length.
3057b952d6SSatish Balay */
315615d1e5SSatish Balay #undef __FUNC__
325615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3457b952d6SSatish Balay {
3557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
37928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3857b952d6SSatish Balay 
3957b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
4057b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4157b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
42928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4357b952d6SSatish Balay   return 0;
4457b952d6SSatish Balay }
4557b952d6SSatish Balay 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
483b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
5057b952d6SSatish Balay {
513b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5257b952d6SSatish Balay   int         ierr;
533b2fbd54SBarry Smith   if (aij->size == 1) {
543b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
55e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
563b2fbd54SBarry Smith   return 0;
573b2fbd54SBarry Smith }
583b2fbd54SBarry Smith 
595615d1e5SSatish Balay #undef __FUNC__
605615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
613b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
623b2fbd54SBarry Smith                                 PetscTruth *done)
633b2fbd54SBarry Smith {
643b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
653b2fbd54SBarry Smith   int        ierr;
663b2fbd54SBarry Smith   if (aij->size == 1) {
673b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
68e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6957b952d6SSatish Balay   return 0;
7057b952d6SSatish Balay }
7180c1aa95SSatish Balay #define CHUNKSIZE  10
7280c1aa95SSatish Balay 
73f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7480c1aa95SSatish Balay { \
7580c1aa95SSatish Balay  \
7680c1aa95SSatish Balay     brow = row/bs;  \
7780c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
7880c1aa95SSatish Balay     rmax = imax[brow]; nrow = ailen[brow]; \
7980c1aa95SSatish Balay       bcol = col/bs; \
8080c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
81*ab26458aSBarry Smith       low = 0; high = nrow; \
82*ab26458aSBarry Smith       while (high-low > 3) { \
83*ab26458aSBarry Smith         t = (low+high)/2; \
84*ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
85*ab26458aSBarry Smith         else              low  = t; \
86*ab26458aSBarry Smith       } \
87*ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
8880c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8980c1aa95SSatish Balay         if (rp[_i] == bcol) { \
9080c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
91eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
92eada6651SSatish Balay           else                    *bap  = value;  \
9380c1aa95SSatish Balay           goto _noinsert; \
9480c1aa95SSatish Balay         } \
9580c1aa95SSatish Balay       } \
960520107fSSatish Balay       if (a->nonew) goto _noinsert; \
9780c1aa95SSatish Balay       if (nrow >= rmax) { \
9880c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9980c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
10080c1aa95SSatish Balay         Scalar *new_a; \
10180c1aa95SSatish Balay  \
10280c1aa95SSatish Balay         /* malloc new storage space */ \
10380c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
10480c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
10580c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10680c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10780c1aa95SSatish Balay  \
10880c1aa95SSatish Balay         /* copy over old data into new slots */ \
10980c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
11080c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
11180c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
11280c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
11380c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
11480c1aa95SSatish Balay                                                            len*sizeof(int)); \
11580c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11680c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11780c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
11880c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
11980c1aa95SSatish Balay         /* free up old matrix storage */ \
12080c1aa95SSatish Balay         PetscFree(a->a);  \
12180c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
12280c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
12380c1aa95SSatish Balay         a->singlemalloc = 1; \
12480c1aa95SSatish Balay  \
12580c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
12680c1aa95SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE; \
12780c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
12880c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
12980c1aa95SSatish Balay         a->reallocs++; \
13080c1aa95SSatish Balay         a->nz++; \
13180c1aa95SSatish Balay       } \
13280c1aa95SSatish Balay       N = nrow++ - 1;  \
13380c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
13480c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
13580c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13680c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13780c1aa95SSatish Balay       } \
13880c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
13980c1aa95SSatish Balay       rp[_i]                      = bcol;  \
14080c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
14180c1aa95SSatish Balay       _noinsert:; \
14280c1aa95SSatish Balay     ailen[brow] = nrow; \
14380c1aa95SSatish Balay }
14457b952d6SSatish Balay 
145639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
1465615d1e5SSatish Balay #undef __FUNC__
1475615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
14857b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
14957b952d6SSatish Balay {
15057b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
15157b952d6SSatish Balay   Scalar      value;
1524fa0d573SSatish Balay   int         ierr,i,j,row,col;
1534fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1544fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1554fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
15657b952d6SSatish Balay 
157eada6651SSatish Balay   /* Some Variables required in the macro */
15880c1aa95SSatish Balay   Mat         A = baij->A;
15980c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
16080c1aa95SSatish Balay   int         *rp,ii,nrow,_i,rmax,N;
16180c1aa95SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
16280c1aa95SSatish Balay   int         *aj=a->j,brow,bcol;
163*ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
16480c1aa95SSatish Balay   Scalar      *ap,*aa=a->a,*bap;
16580c1aa95SSatish Balay 
1662c3acbe9SBarry Smith #if defined(PETSC_BOPT_g)
16757b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
168e3372554SBarry Smith     SETERRQ(1,0,"Cannot mix inserts and adds");
16957b952d6SSatish Balay   }
1702c3acbe9SBarry Smith #endif
17157b952d6SSatish Balay   baij->insertmode = addv;
17257b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
173639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
174e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
175e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
176639f9d9dSBarry Smith #endif
17757b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
17857b952d6SSatish Balay       row = im[i] - rstart_orig;
17957b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
18057b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
18157b952d6SSatish Balay           col = in[j] - cstart_orig;
18257b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
183f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
18480c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
18557b952d6SSatish Balay         }
186639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
187e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
188e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
189639f9d9dSBarry Smith #endif
19057b952d6SSatish Balay         else {
19157b952d6SSatish Balay           if (mat->was_assembled) {
192905e6a2fSBarry Smith             if (!baij->colmap) {
193905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
194905e6a2fSBarry Smith             }
195905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
19657b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
19757b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
19857b952d6SSatish Balay               col =  in[j];
19957b952d6SSatish Balay             }
20057b952d6SSatish Balay           }
20157b952d6SSatish Balay           else col = in[j];
20257b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
203639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
20457b952d6SSatish Balay         }
20557b952d6SSatish Balay       }
20657b952d6SSatish Balay     }
20757b952d6SSatish Balay     else {
20890f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
20957b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
21057b952d6SSatish Balay       }
21157b952d6SSatish Balay       else {
21290f02eecSBarry Smith         if (!baij->donotstash) {
21357b952d6SSatish Balay           row = im[i];
21457b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
21557b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
21657b952d6SSatish Balay           }
21757b952d6SSatish Balay         }
21857b952d6SSatish Balay       }
21957b952d6SSatish Balay     }
22090f02eecSBarry Smith   }
22157b952d6SSatish Balay   return 0;
22257b952d6SSatish Balay }
22357b952d6SSatish Balay 
224*ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
225*ab26458aSBarry Smith #undef __FUNC__
226*ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
227*ab26458aSBarry Smith static int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
228*ab26458aSBarry Smith {
229*ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
230*ab26458aSBarry Smith   Scalar      *value,*tmp;
231*ab26458aSBarry Smith   int         ierr,i,j,ii,jj,row,col;
232*ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
233*ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
234*ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
235*ab26458aSBarry Smith 
236*ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
237*ab26458aSBarry Smith   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
238*ab26458aSBarry Smith     SETERRQ(1,0,"Cannot mix inserts and adds");
239*ab26458aSBarry Smith   }
240*ab26458aSBarry Smith #endif
241*ab26458aSBarry Smith   /* Should be stashed somewhere to avoid multiple mallocs */
242*ab26458aSBarry Smith   tmp = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(tmp);
243*ab26458aSBarry Smith   if (roworiented) {
244*ab26458aSBarry Smith     stepval = (n-1)*bs;
245*ab26458aSBarry Smith   } else {
246*ab26458aSBarry Smith     stepval = (m-1)*bs;
247*ab26458aSBarry Smith   }
248*ab26458aSBarry Smith   value = (Scalar *)PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(value);
249*ab26458aSBarry Smith   baij->insertmode = addv;
250*ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
251*ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
252*ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
253*ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
254*ab26458aSBarry Smith #endif
255*ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
256*ab26458aSBarry Smith       row = im[i] - rstart;
257*ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
258*ab26458aSBarry Smith         if (in[j] >= cstart && in[j] < cend){
259*ab26458aSBarry Smith           col = in[j] - cstart;
260*ab26458aSBarry Smith           if (roworiented) {
261*ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
262*ab26458aSBarry Smith             for ( ii=0; ii<bs; ii++,value+=stepval )
263*ab26458aSBarry Smith               for (jj=ii; jj<bs2; jj+=bs )
264*ab26458aSBarry Smith                 tmp[jj] = *value++;
265*ab26458aSBarry Smith           } else {
266*ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
267*ab26458aSBarry Smith             for ( ii=0; ii<bs; ii++,value+=stepval )
268*ab26458aSBarry Smith               for (jj=0; jj<bs; jj++ )
269*ab26458aSBarry Smith                 *tmp++  = *value++;
270*ab26458aSBarry Smith             tmp -=bs2;
271*ab26458aSBarry Smith           }
272*ab26458aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,tmp,addv);CHKERRQ(ierr);
273*ab26458aSBarry Smith         }
274*ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
275*ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
276*ab26458aSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");}
277*ab26458aSBarry Smith #endif
278*ab26458aSBarry Smith         else {
279*ab26458aSBarry Smith           if (mat->was_assembled) {
280*ab26458aSBarry Smith             if (!baij->colmap) {
281*ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
282*ab26458aSBarry Smith             }
283*ab26458aSBarry Smith             col = baij->colmap[in[j]] - 1;
284*ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
285*ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
286*ab26458aSBarry Smith               col =  in[j];
287*ab26458aSBarry Smith             }
288*ab26458aSBarry Smith           }
289*ab26458aSBarry Smith           else col = in[j];
290*ab26458aSBarry Smith           if (roworiented) {
291*ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
292*ab26458aSBarry Smith             for ( ii=0; ii<bs; ii++,value+=stepval )
293*ab26458aSBarry Smith               for (jj=ii; jj<bs2; jj+=bs )
294*ab26458aSBarry Smith                 tmp[jj] = *value++;
295*ab26458aSBarry Smith           } else {
296*ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
297*ab26458aSBarry Smith             for ( ii=0; ii<bs; ii++,value+=stepval )
298*ab26458aSBarry Smith               for (jj=0; jj<bs; jj++ )
299*ab26458aSBarry Smith                 *tmp++  = *value++;
300*ab26458aSBarry Smith             tmp -=bs2;
301*ab26458aSBarry Smith           }
302*ab26458aSBarry Smith           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,tmp,addv);CHKERRQ(ierr);
303*ab26458aSBarry Smith         }
304*ab26458aSBarry Smith       }
305*ab26458aSBarry Smith     }
306*ab26458aSBarry Smith     else {
307*ab26458aSBarry Smith       if (!baij->donotstash) {
308*ab26458aSBarry Smith         row = im[i]*bs;
309*ab26458aSBarry Smith         if (roworiented ) {
310*ab26458aSBarry Smith           for ( ii=0; ii<bs; i++,row++ ) {
311*ab26458aSBarry Smith             for ( jj=0; jj<bs; jj++ ) {
312*ab26458aSBarry Smith               ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
313*ab26458aSBarry Smith             }
314*ab26458aSBarry Smith           }
315*ab26458aSBarry Smith         }
316*ab26458aSBarry Smith         else {
317*ab26458aSBarry Smith           row = im[i];
318*ab26458aSBarry Smith 	  for ( j=0; j<n; j++ ) {
319*ab26458aSBarry Smith 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
320*ab26458aSBarry Smith           }
321*ab26458aSBarry Smith         }
322*ab26458aSBarry Smith       }
323*ab26458aSBarry Smith     }
324*ab26458aSBarry Smith   }
325*ab26458aSBarry Smith   PetscFree(tmp);
326*ab26458aSBarry Smith   return 0;
327*ab26458aSBarry Smith }
328*ab26458aSBarry Smith 
3295615d1e5SSatish Balay #undef __FUNC__
3305615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
331d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
332d6de1c52SSatish Balay {
333d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
334d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
335d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
336d6de1c52SSatish Balay 
337d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
338e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
339e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
340d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
341d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
342d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
343e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
344e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
345d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
346d6de1c52SSatish Balay           col = idxn[j] - bscstart;
347d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
348d6de1c52SSatish Balay         }
349d6de1c52SSatish Balay         else {
350905e6a2fSBarry Smith           if (!baij->colmap) {
351905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
352905e6a2fSBarry Smith           }
353e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
354dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
355d9d09a02SSatish Balay           else {
356dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
357d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
358d6de1c52SSatish Balay           }
359d6de1c52SSatish Balay         }
360d6de1c52SSatish Balay       }
361d9d09a02SSatish Balay     }
362d6de1c52SSatish Balay     else {
363e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
364d6de1c52SSatish Balay     }
365d6de1c52SSatish Balay   }
366d6de1c52SSatish Balay   return 0;
367d6de1c52SSatish Balay }
368d6de1c52SSatish Balay 
3695615d1e5SSatish Balay #undef __FUNC__
3705615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
371d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
372d6de1c52SSatish Balay {
373d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
374d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
375acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
376d6de1c52SSatish Balay   double     sum = 0.0;
377d6de1c52SSatish Balay   Scalar     *v;
378d6de1c52SSatish Balay 
379d6de1c52SSatish Balay   if (baij->size == 1) {
380d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
381d6de1c52SSatish Balay   } else {
382d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
383d6de1c52SSatish Balay       v = amat->a;
384d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
385d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
386d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
387d6de1c52SSatish Balay #else
388d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
389d6de1c52SSatish Balay #endif
390d6de1c52SSatish Balay       }
391d6de1c52SSatish Balay       v = bmat->a;
392d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
393d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
394d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
395d6de1c52SSatish Balay #else
396d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
397d6de1c52SSatish Balay #endif
398d6de1c52SSatish Balay       }
399d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
400d6de1c52SSatish Balay       *norm = sqrt(*norm);
401d6de1c52SSatish Balay     }
402acdf5bf4SSatish Balay     else
403e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
404d6de1c52SSatish Balay   }
405d6de1c52SSatish Balay   return 0;
406d6de1c52SSatish Balay }
40757b952d6SSatish Balay 
4085615d1e5SSatish Balay #undef __FUNC__
4095615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
41057b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
41157b952d6SSatish Balay {
41257b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
41357b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
41457b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
41557b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
41657b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
41757b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
41857b952d6SSatish Balay   InsertMode  addv;
41957b952d6SSatish Balay   Scalar      *rvalues,*svalues;
42057b952d6SSatish Balay 
42157b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
42257b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
42357b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
424e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
42557b952d6SSatish Balay   }
42657b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
42757b952d6SSatish Balay 
42857b952d6SSatish Balay   /*  first count number of contributors to each processor */
42957b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
43057b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
43157b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
43257b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
43357b952d6SSatish Balay     idx = baij->stash.idx[i];
43457b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
43557b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
43657b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
43757b952d6SSatish Balay       }
43857b952d6SSatish Balay     }
43957b952d6SSatish Balay   }
44057b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
44157b952d6SSatish Balay 
44257b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
44357b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
44457b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
44557b952d6SSatish Balay   nreceives = work[rank];
44657b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
44757b952d6SSatish Balay   nmax = work[rank];
44857b952d6SSatish Balay   PetscFree(work);
44957b952d6SSatish Balay 
45057b952d6SSatish Balay   /* post receives:
45157b952d6SSatish Balay        1) each message will consist of ordered pairs
45257b952d6SSatish Balay      (global index,value) we store the global index as a double
45357b952d6SSatish Balay      to simplify the message passing.
45457b952d6SSatish Balay        2) since we don't know how long each individual message is we
45557b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
45657b952d6SSatish Balay      this is a lot of wasted space.
45757b952d6SSatish Balay 
45857b952d6SSatish Balay 
45957b952d6SSatish Balay        This could be done better.
46057b952d6SSatish Balay   */
46157b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
46257b952d6SSatish Balay   CHKPTRQ(rvalues);
46357b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
46457b952d6SSatish Balay   CHKPTRQ(recv_waits);
46557b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
46657b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
46757b952d6SSatish Balay               comm,recv_waits+i);
46857b952d6SSatish Balay   }
46957b952d6SSatish Balay 
47057b952d6SSatish Balay   /* do sends:
47157b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
47257b952d6SSatish Balay          the ith processor
47357b952d6SSatish Balay   */
47457b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
47557b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
47657b952d6SSatish Balay   CHKPTRQ(send_waits);
47757b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
47857b952d6SSatish Balay   starts[0] = 0;
47957b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
48057b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
48157b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
48257b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
48357b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
48457b952d6SSatish Balay   }
48557b952d6SSatish Balay   PetscFree(owner);
48657b952d6SSatish Balay   starts[0] = 0;
48757b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
48857b952d6SSatish Balay   count = 0;
48957b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
49057b952d6SSatish Balay     if (procs[i]) {
49157b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
49257b952d6SSatish Balay                 comm,send_waits+count++);
49357b952d6SSatish Balay     }
49457b952d6SSatish Balay   }
49557b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
49657b952d6SSatish Balay 
49757b952d6SSatish Balay   /* Free cache space */
498d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
49957b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
50057b952d6SSatish Balay 
50157b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
50257b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
50357b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
50457b952d6SSatish Balay   baij->rmax       = nmax;
50557b952d6SSatish Balay 
50657b952d6SSatish Balay   return 0;
50757b952d6SSatish Balay }
50857b952d6SSatish Balay 
50957b952d6SSatish Balay 
5105615d1e5SSatish Balay #undef __FUNC__
5115615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
51257b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
51357b952d6SSatish Balay {
51457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
51557b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
51657b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
51757b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
51857b952d6SSatish Balay   Scalar      *values,val;
51957b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
52057b952d6SSatish Balay 
52157b952d6SSatish Balay   /*  wait on receives */
52257b952d6SSatish Balay   while (count) {
52357b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
52457b952d6SSatish Balay     /* unpack receives into our local space */
52557b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
52657b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
52757b952d6SSatish Balay     n = n/3;
52857b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
52957b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
53057b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
53157b952d6SSatish Balay       val = values[3*i+2];
53257b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
53357b952d6SSatish Balay         col -= baij->cstart*bs;
53457b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
53557b952d6SSatish Balay       }
53657b952d6SSatish Balay       else {
53757b952d6SSatish Balay         if (mat->was_assembled) {
538905e6a2fSBarry Smith           if (!baij->colmap) {
539905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
540905e6a2fSBarry Smith           }
541905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
54257b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
54357b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
54457b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
54557b952d6SSatish Balay           }
54657b952d6SSatish Balay         }
54757b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
54857b952d6SSatish Balay       }
54957b952d6SSatish Balay     }
55057b952d6SSatish Balay     count--;
55157b952d6SSatish Balay   }
55257b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
55357b952d6SSatish Balay 
55457b952d6SSatish Balay   /* wait on sends */
55557b952d6SSatish Balay   if (baij->nsends) {
55657b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
55757b952d6SSatish Balay     CHKPTRQ(send_status);
55857b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
55957b952d6SSatish Balay     PetscFree(send_status);
56057b952d6SSatish Balay   }
56157b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
56257b952d6SSatish Balay 
56357b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
56457b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
56557b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
56657b952d6SSatish Balay 
56757b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
56857b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
56957b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
57057b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
57157b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
57257b952d6SSatish Balay   }
57357b952d6SSatish Balay 
5746d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
57557b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
57657b952d6SSatish Balay   }
57757b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
57857b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
57957b952d6SSatish Balay 
58057b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
58157b952d6SSatish Balay   return 0;
58257b952d6SSatish Balay }
58357b952d6SSatish Balay 
5845615d1e5SSatish Balay #undef __FUNC__
5855615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
58657b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
58757b952d6SSatish Balay {
58857b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
58957b952d6SSatish Balay   int          ierr;
59057b952d6SSatish Balay 
59157b952d6SSatish Balay   if (baij->size == 1) {
59257b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
59357b952d6SSatish Balay   }
594e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
59557b952d6SSatish Balay   return 0;
59657b952d6SSatish Balay }
59757b952d6SSatish Balay 
5985615d1e5SSatish Balay #undef __FUNC__
5995615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
60057b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
60157b952d6SSatish Balay {
60257b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
603cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
60457b952d6SSatish Balay   FILE         *fd;
60557b952d6SSatish Balay   ViewerType   vtype;
60657b952d6SSatish Balay 
60757b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
60857b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
60957b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
610639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
6114e220ebcSLois Curfman McInnes       MatInfo info;
61257b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
61357b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
6144e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
61557b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
61657b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
6174e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
6184e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
6194e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
6204e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
6214e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
6224e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
62357b952d6SSatish Balay       fflush(fd);
62457b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
62557b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
62657b952d6SSatish Balay       return 0;
62757b952d6SSatish Balay     }
628639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
629bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
63057b952d6SSatish Balay       return 0;
63157b952d6SSatish Balay     }
63257b952d6SSatish Balay   }
63357b952d6SSatish Balay 
63457b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
63557b952d6SSatish Balay     Draw       draw;
63657b952d6SSatish Balay     PetscTruth isnull;
63757b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
63857b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
63957b952d6SSatish Balay   }
64057b952d6SSatish Balay 
64157b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
64257b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
64357b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
64457b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
64557b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
64657b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
64757b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
64857b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
64957b952d6SSatish Balay     fflush(fd);
65057b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
65157b952d6SSatish Balay   }
65257b952d6SSatish Balay   else {
65357b952d6SSatish Balay     int size = baij->size;
65457b952d6SSatish Balay     rank = baij->rank;
65557b952d6SSatish Balay     if (size == 1) {
65657b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
65757b952d6SSatish Balay     }
65857b952d6SSatish Balay     else {
65957b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
66057b952d6SSatish Balay       Mat         A;
66157b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
66257b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
66357b952d6SSatish Balay       int         mbs=baij->mbs;
66457b952d6SSatish Balay       Scalar      *a;
66557b952d6SSatish Balay 
66657b952d6SSatish Balay       if (!rank) {
667cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
66857b952d6SSatish Balay         CHKERRQ(ierr);
66957b952d6SSatish Balay       }
67057b952d6SSatish Balay       else {
671cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
67257b952d6SSatish Balay         CHKERRQ(ierr);
67357b952d6SSatish Balay       }
67457b952d6SSatish Balay       PLogObjectParent(mat,A);
67557b952d6SSatish Balay 
67657b952d6SSatish Balay       /* copy over the A part */
67757b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
67857b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
67957b952d6SSatish Balay       row = baij->rstart;
68057b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
68157b952d6SSatish Balay 
68257b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
68357b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
68457b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
68557b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
68657b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
68757b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
688cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
689cee3aa6bSSatish Balay             col++; a += bs;
69057b952d6SSatish Balay           }
69157b952d6SSatish Balay         }
69257b952d6SSatish Balay       }
69357b952d6SSatish Balay       /* copy over the B part */
69457b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
69557b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
69657b952d6SSatish Balay       row = baij->rstart*bs;
69757b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
69857b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
69957b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
70057b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
70157b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
70257b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
703cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
704cee3aa6bSSatish Balay             col++; a += bs;
70557b952d6SSatish Balay           }
70657b952d6SSatish Balay         }
70757b952d6SSatish Balay       }
70857b952d6SSatish Balay       PetscFree(rvals);
7096d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
7106d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
71157b952d6SSatish Balay       if (!rank) {
71257b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
71357b952d6SSatish Balay       }
71457b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
71557b952d6SSatish Balay     }
71657b952d6SSatish Balay   }
71757b952d6SSatish Balay   return 0;
71857b952d6SSatish Balay }
71957b952d6SSatish Balay 
72057b952d6SSatish Balay 
72157b952d6SSatish Balay 
7225615d1e5SSatish Balay #undef __FUNC__
7235615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
72457b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
72557b952d6SSatish Balay {
72657b952d6SSatish Balay   Mat         mat = (Mat) obj;
72757b952d6SSatish Balay   int         ierr;
72857b952d6SSatish Balay   ViewerType  vtype;
72957b952d6SSatish Balay 
73057b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
73157b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
73257b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
73357b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
73457b952d6SSatish Balay   }
73557b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
73657b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
73757b952d6SSatish Balay   }
73857b952d6SSatish Balay   return 0;
73957b952d6SSatish Balay }
74057b952d6SSatish Balay 
7415615d1e5SSatish Balay #undef __FUNC__
7425615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
74379bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
74479bdfe76SSatish Balay {
74579bdfe76SSatish Balay   Mat         mat = (Mat) obj;
74679bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
74779bdfe76SSatish Balay   int         ierr;
74879bdfe76SSatish Balay 
74979bdfe76SSatish Balay #if defined(PETSC_LOG)
75079bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
75179bdfe76SSatish Balay #endif
75279bdfe76SSatish Balay 
75379bdfe76SSatish Balay   PetscFree(baij->rowners);
75479bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
75579bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
75679bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
75779bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
75879bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
75979bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
76079bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
76179bdfe76SSatish Balay   PetscFree(baij);
76290f02eecSBarry Smith   if (mat->mapping) {
76390f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
76490f02eecSBarry Smith   }
76579bdfe76SSatish Balay   PLogObjectDestroy(mat);
76679bdfe76SSatish Balay   PetscHeaderDestroy(mat);
76779bdfe76SSatish Balay   return 0;
76879bdfe76SSatish Balay }
76979bdfe76SSatish Balay 
7705615d1e5SSatish Balay #undef __FUNC__
7715615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
772cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
773cee3aa6bSSatish Balay {
774cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
77547b4a8eaSLois Curfman McInnes   int         ierr, nt;
776cee3aa6bSSatish Balay 
777c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
77847b4a8eaSLois Curfman McInnes   if (nt != a->n) {
779*ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
78047b4a8eaSLois Curfman McInnes   }
781c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
78247b4a8eaSLois Curfman McInnes   if (nt != a->m) {
783e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
78447b4a8eaSLois Curfman McInnes   }
78543a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
786cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
78743a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
788cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
78943a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
790cee3aa6bSSatish Balay   return 0;
791cee3aa6bSSatish Balay }
792cee3aa6bSSatish Balay 
7935615d1e5SSatish Balay #undef __FUNC__
7945615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
795cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
796cee3aa6bSSatish Balay {
797cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
798cee3aa6bSSatish Balay   int        ierr;
79943a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
800cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
80143a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
802cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
803cee3aa6bSSatish Balay   return 0;
804cee3aa6bSSatish Balay }
805cee3aa6bSSatish Balay 
8065615d1e5SSatish Balay #undef __FUNC__
8075615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
808cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
809cee3aa6bSSatish Balay {
810cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
811cee3aa6bSSatish Balay   int        ierr;
812cee3aa6bSSatish Balay 
813cee3aa6bSSatish Balay   /* do nondiagonal part */
814cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
815cee3aa6bSSatish Balay   /* send it on its way */
816537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
817cee3aa6bSSatish Balay   /* do local part */
818cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
819cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
820cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
821cee3aa6bSSatish Balay   /* but is not perhaps always true. */
822639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
823cee3aa6bSSatish Balay   return 0;
824cee3aa6bSSatish Balay }
825cee3aa6bSSatish Balay 
8265615d1e5SSatish Balay #undef __FUNC__
8275615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
828cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
829cee3aa6bSSatish Balay {
830cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
831cee3aa6bSSatish Balay   int        ierr;
832cee3aa6bSSatish Balay 
833cee3aa6bSSatish Balay   /* do nondiagonal part */
834cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
835cee3aa6bSSatish Balay   /* send it on its way */
836537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
837cee3aa6bSSatish Balay   /* do local part */
838cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
839cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
840cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
841cee3aa6bSSatish Balay   /* but is not perhaps always true. */
842537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
843cee3aa6bSSatish Balay   return 0;
844cee3aa6bSSatish Balay }
845cee3aa6bSSatish Balay 
846cee3aa6bSSatish Balay /*
847cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
848cee3aa6bSSatish Balay    diagonal block
849cee3aa6bSSatish Balay */
8505615d1e5SSatish Balay #undef __FUNC__
8515615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
852cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
853cee3aa6bSSatish Balay {
854cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
855cee3aa6bSSatish Balay   if (a->M != a->N)
856e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
857cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
858cee3aa6bSSatish Balay }
859cee3aa6bSSatish Balay 
8605615d1e5SSatish Balay #undef __FUNC__
8615615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
862cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
863cee3aa6bSSatish Balay {
864cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
865cee3aa6bSSatish Balay   int        ierr;
866cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
867cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
868cee3aa6bSSatish Balay   return 0;
869cee3aa6bSSatish Balay }
870026e39d0SSatish Balay 
8715615d1e5SSatish Balay #undef __FUNC__
8725615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
87357b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
87457b952d6SSatish Balay {
87557b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
87657b952d6SSatish Balay   *m = mat->M; *n = mat->N;
87757b952d6SSatish Balay   return 0;
87857b952d6SSatish Balay }
87957b952d6SSatish Balay 
8805615d1e5SSatish Balay #undef __FUNC__
8815615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
88257b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
88357b952d6SSatish Balay {
88457b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
88557b952d6SSatish Balay   *m = mat->m; *n = mat->N;
88657b952d6SSatish Balay   return 0;
88757b952d6SSatish Balay }
88857b952d6SSatish Balay 
8895615d1e5SSatish Balay #undef __FUNC__
8905615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
89157b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
89257b952d6SSatish Balay {
89357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
89457b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
89557b952d6SSatish Balay   return 0;
89657b952d6SSatish Balay }
89757b952d6SSatish Balay 
898acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
899acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
900acdf5bf4SSatish Balay 
9015615d1e5SSatish Balay #undef __FUNC__
9025615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
903acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
904acdf5bf4SSatish Balay {
905acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
906acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
907acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
908d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
909d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
910acdf5bf4SSatish Balay 
911e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
912acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
913acdf5bf4SSatish Balay 
914acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
915acdf5bf4SSatish Balay     /*
916acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
917acdf5bf4SSatish Balay     */
918acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
919bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
920bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
921acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
922acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
923acdf5bf4SSatish Balay     }
924acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
925acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
926acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
927acdf5bf4SSatish Balay   }
928acdf5bf4SSatish Balay 
929acdf5bf4SSatish Balay 
930e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
931d9d09a02SSatish Balay   lrow = row - brstart;
932acdf5bf4SSatish Balay 
933acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
934acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
935acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
936acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
937acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
938acdf5bf4SSatish Balay   nztot = nzA + nzB;
939acdf5bf4SSatish Balay 
940acdf5bf4SSatish Balay   cmap  = mat->garray;
941acdf5bf4SSatish Balay   if (v  || idx) {
942acdf5bf4SSatish Balay     if (nztot) {
943acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
944acdf5bf4SSatish Balay       int imark = -1;
945acdf5bf4SSatish Balay       if (v) {
946acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
947acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
948d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
949acdf5bf4SSatish Balay           else break;
950acdf5bf4SSatish Balay         }
951acdf5bf4SSatish Balay         imark = i;
952acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
953acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
954acdf5bf4SSatish Balay       }
955acdf5bf4SSatish Balay       if (idx) {
956acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
957acdf5bf4SSatish Balay         if (imark > -1) {
958acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
959bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
960acdf5bf4SSatish Balay           }
961acdf5bf4SSatish Balay         } else {
962acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
963d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
964d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
965acdf5bf4SSatish Balay             else break;
966acdf5bf4SSatish Balay           }
967acdf5bf4SSatish Balay           imark = i;
968acdf5bf4SSatish Balay         }
969d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
970d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
971acdf5bf4SSatish Balay       }
972acdf5bf4SSatish Balay     }
973d212a18eSSatish Balay     else {
974d212a18eSSatish Balay       if (idx) *idx = 0;
975d212a18eSSatish Balay       if (v)   *v   = 0;
976d212a18eSSatish Balay     }
977acdf5bf4SSatish Balay   }
978acdf5bf4SSatish Balay   *nz = nztot;
979acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
980acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
981acdf5bf4SSatish Balay   return 0;
982acdf5bf4SSatish Balay }
983acdf5bf4SSatish Balay 
9845615d1e5SSatish Balay #undef __FUNC__
9855615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
986acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
987acdf5bf4SSatish Balay {
988acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
989acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
990e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
991acdf5bf4SSatish Balay   }
992acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
993acdf5bf4SSatish Balay   return 0;
994acdf5bf4SSatish Balay }
995acdf5bf4SSatish Balay 
9965615d1e5SSatish Balay #undef __FUNC__
9975615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
9985a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
9995a838052SSatish Balay {
10005a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
10015a838052SSatish Balay   *bs = baij->bs;
10025a838052SSatish Balay   return 0;
10035a838052SSatish Balay }
10045a838052SSatish Balay 
10055615d1e5SSatish Balay #undef __FUNC__
10065615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
100758667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
100858667388SSatish Balay {
100958667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
101058667388SSatish Balay   int         ierr;
101158667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
101258667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
101358667388SSatish Balay   return 0;
101458667388SSatish Balay }
10150ac07820SSatish Balay 
10165615d1e5SSatish Balay #undef __FUNC__
10175615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
10184e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
10190ac07820SSatish Balay {
10204e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
10214e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
10227d57db60SLois Curfman McInnes   int         ierr;
10237d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
10240ac07820SSatish Balay 
10254e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
10264e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
10274e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
10284e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
10294e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
10304e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
10314e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
10324e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
10334e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
10340ac07820SSatish Balay   if (flag == MAT_LOCAL) {
10354e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
10364e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
10374e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
10384e220ebcSLois Curfman McInnes     info->memory       = isend[3];
10394e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
10400ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
10410ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm);
10424e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10434e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10444e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10454e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10464e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
10470ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
10480ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm);
10494e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10504e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10514e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10524e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10534e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
10540ac07820SSatish Balay   }
10554e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
10564e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
10574e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
10580ac07820SSatish Balay   return 0;
10590ac07820SSatish Balay }
10600ac07820SSatish Balay 
10615615d1e5SSatish Balay #undef __FUNC__
10625615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
106358667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
106458667388SSatish Balay {
106558667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
106658667388SSatish Balay 
106758667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
106858667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
10696da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1070b1fbbac0SLois Curfman McInnes       op == MAT_COLUMNS_SORTED) {
1071b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1072b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1073b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1074aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
107558667388SSatish Balay         MatSetOption(a->A,op);
107658667388SSatish Balay         MatSetOption(a->B,op);
1077b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
10786da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
107958667388SSatish Balay              op == MAT_SYMMETRIC ||
108058667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
108158667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
108258667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
108358667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
108458667388SSatish Balay     a->roworiented = 0;
108558667388SSatish Balay     MatSetOption(a->A,op);
108658667388SSatish Balay     MatSetOption(a->B,op);
108790f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
108890f02eecSBarry Smith     a->donotstash = 1;
108990f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1090e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
109158667388SSatish Balay   else
1092e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
109358667388SSatish Balay   return 0;
109458667388SSatish Balay }
109558667388SSatish Balay 
10965615d1e5SSatish Balay #undef __FUNC__
10975615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
10980ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
10990ac07820SSatish Balay {
11000ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
11010ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
11020ac07820SSatish Balay   Mat        B;
11030ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
11040ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
11050ac07820SSatish Balay   Scalar     *a;
11060ac07820SSatish Balay 
11070ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1108e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
11090ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
11100ac07820SSatish Balay   CHKERRQ(ierr);
11110ac07820SSatish Balay 
11120ac07820SSatish Balay   /* copy over the A part */
11130ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
11140ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11150ac07820SSatish Balay   row = baij->rstart;
11160ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
11170ac07820SSatish Balay 
11180ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
11190ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
11200ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
11210ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
11220ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
11230ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
11240ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
11250ac07820SSatish Balay         col++; a += bs;
11260ac07820SSatish Balay       }
11270ac07820SSatish Balay     }
11280ac07820SSatish Balay   }
11290ac07820SSatish Balay   /* copy over the B part */
11300ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
11310ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11320ac07820SSatish Balay   row = baij->rstart*bs;
11330ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
11340ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
11350ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
11360ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
11370ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
11380ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
11390ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
11400ac07820SSatish Balay         col++; a += bs;
11410ac07820SSatish Balay       }
11420ac07820SSatish Balay     }
11430ac07820SSatish Balay   }
11440ac07820SSatish Balay   PetscFree(rvals);
11450ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11460ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11470ac07820SSatish Balay 
11480ac07820SSatish Balay   if (matout != PETSC_NULL) {
11490ac07820SSatish Balay     *matout = B;
11500ac07820SSatish Balay   } else {
11510ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
11520ac07820SSatish Balay     PetscFree(baij->rowners);
11530ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
11540ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
11550ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
11560ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
11570ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
11580ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
11590ac07820SSatish Balay     PetscFree(baij);
11600ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
11610ac07820SSatish Balay     PetscHeaderDestroy(B);
11620ac07820SSatish Balay   }
11630ac07820SSatish Balay   return 0;
11640ac07820SSatish Balay }
11650e95ebc0SSatish Balay 
11665615d1e5SSatish Balay #undef __FUNC__
11675615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
11680e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
11690e95ebc0SSatish Balay {
11700e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
11710e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
11720e95ebc0SSatish Balay   int ierr,s1,s2,s3;
11730e95ebc0SSatish Balay 
11740e95ebc0SSatish Balay   if (ll)  {
11750e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
11760e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1177e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
11780e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
11790e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
11800e95ebc0SSatish Balay   }
1181e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
11820e95ebc0SSatish Balay   return 0;
11830e95ebc0SSatish Balay }
11840e95ebc0SSatish Balay 
11850ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
11860ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
11870ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
11880ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
11890ac07820SSatish Balay    routine.
11900ac07820SSatish Balay */
11915615d1e5SSatish Balay #undef __FUNC__
11925615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
11930ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
11940ac07820SSatish Balay {
11950ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
11960ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
11970ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
11980ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
11990ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
12000ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
12010ac07820SSatish Balay   MPI_Comm       comm = A->comm;
12020ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
12030ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
12040ac07820SSatish Balay   IS             istmp;
12050ac07820SSatish Balay 
12060ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
12070ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
12080ac07820SSatish Balay 
12090ac07820SSatish Balay   /*  first count number of contributors to each processor */
12100ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
12110ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
12120ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
12130ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
12140ac07820SSatish Balay     idx = rows[i];
12150ac07820SSatish Balay     found = 0;
12160ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
12170ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
12180ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
12190ac07820SSatish Balay       }
12200ac07820SSatish Balay     }
1221e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
12220ac07820SSatish Balay   }
12230ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
12240ac07820SSatish Balay 
12250ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
12260ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
12270ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
12280ac07820SSatish Balay   nrecvs = work[rank];
12290ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
12300ac07820SSatish Balay   nmax = work[rank];
12310ac07820SSatish Balay   PetscFree(work);
12320ac07820SSatish Balay 
12330ac07820SSatish Balay   /* post receives:   */
12340ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
12350ac07820SSatish Balay   CHKPTRQ(rvalues);
12360ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
12370ac07820SSatish Balay   CHKPTRQ(recv_waits);
12380ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
12390ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
12400ac07820SSatish Balay   }
12410ac07820SSatish Balay 
12420ac07820SSatish Balay   /* do sends:
12430ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
12440ac07820SSatish Balay          the ith processor
12450ac07820SSatish Balay   */
12460ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
12470ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
12480ac07820SSatish Balay   CHKPTRQ(send_waits);
12490ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
12500ac07820SSatish Balay   starts[0] = 0;
12510ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
12520ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
12530ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
12540ac07820SSatish Balay   }
12550ac07820SSatish Balay   ISRestoreIndices(is,&rows);
12560ac07820SSatish Balay 
12570ac07820SSatish Balay   starts[0] = 0;
12580ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
12590ac07820SSatish Balay   count = 0;
12600ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
12610ac07820SSatish Balay     if (procs[i]) {
12620ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
12630ac07820SSatish Balay     }
12640ac07820SSatish Balay   }
12650ac07820SSatish Balay   PetscFree(starts);
12660ac07820SSatish Balay 
12670ac07820SSatish Balay   base = owners[rank]*bs;
12680ac07820SSatish Balay 
12690ac07820SSatish Balay   /*  wait on receives */
12700ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
12710ac07820SSatish Balay   source = lens + nrecvs;
12720ac07820SSatish Balay   count  = nrecvs; slen = 0;
12730ac07820SSatish Balay   while (count) {
12740ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
12750ac07820SSatish Balay     /* unpack receives into our local space */
12760ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
12770ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
12780ac07820SSatish Balay     lens[imdex]  = n;
12790ac07820SSatish Balay     slen += n;
12800ac07820SSatish Balay     count--;
12810ac07820SSatish Balay   }
12820ac07820SSatish Balay   PetscFree(recv_waits);
12830ac07820SSatish Balay 
12840ac07820SSatish Balay   /* move the data into the send scatter */
12850ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
12860ac07820SSatish Balay   count = 0;
12870ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
12880ac07820SSatish Balay     values = rvalues + i*nmax;
12890ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
12900ac07820SSatish Balay       lrows[count++] = values[j] - base;
12910ac07820SSatish Balay     }
12920ac07820SSatish Balay   }
12930ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
12940ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
12950ac07820SSatish Balay 
12960ac07820SSatish Balay   /* actually zap the local rows */
1297537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
12980ac07820SSatish Balay   PLogObjectParent(A,istmp);
12990ac07820SSatish Balay   PetscFree(lrows);
13000ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
13010ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
13020ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
13030ac07820SSatish Balay 
13040ac07820SSatish Balay   /* wait on sends */
13050ac07820SSatish Balay   if (nsends) {
13060ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
13070ac07820SSatish Balay     CHKPTRQ(send_status);
13080ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
13090ac07820SSatish Balay     PetscFree(send_status);
13100ac07820SSatish Balay   }
13110ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
13120ac07820SSatish Balay 
13130ac07820SSatish Balay   return 0;
13140ac07820SSatish Balay }
1315ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
13165615d1e5SSatish Balay #undef __FUNC__
13175615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1318ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A)
1319ba4ca20aSSatish Balay {
1320ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1321ba4ca20aSSatish Balay 
1322ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1323ba4ca20aSSatish Balay   else return 0;
1324ba4ca20aSSatish Balay }
13250ac07820SSatish Balay 
13265615d1e5SSatish Balay #undef __FUNC__
13275615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1328bb5a7306SBarry Smith static int MatSetUnfactored_MPIBAIJ(Mat A)
1329bb5a7306SBarry Smith {
1330bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1331bb5a7306SBarry Smith   int         ierr;
1332bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1333bb5a7306SBarry Smith   return 0;
1334bb5a7306SBarry Smith }
1335bb5a7306SBarry Smith 
13360ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
13370ac07820SSatish Balay 
133879bdfe76SSatish Balay /* -------------------------------------------------------------------*/
133979bdfe76SSatish Balay static struct _MatOps MatOps = {
1340bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
13414c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
13424c50302cSBarry Smith   0,0,0,0,
13430ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
13440e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
134558667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
13464c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
13474c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
13484c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
134994a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1350d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1351ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1352bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1353*ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
135479bdfe76SSatish Balay 
135579bdfe76SSatish Balay 
13565615d1e5SSatish Balay #undef __FUNC__
13575615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
135879bdfe76SSatish Balay /*@C
135979bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
136079bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
136179bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
136279bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
136379bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
136479bdfe76SSatish Balay 
136579bdfe76SSatish Balay    Input Parameters:
136679bdfe76SSatish Balay .  comm - MPI communicator
136779bdfe76SSatish Balay .  bs   - size of blockk
136879bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
136992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
137092e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
137192e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
137292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
137392e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
137479bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
137592e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
137679bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
137779bdfe76SSatish Balay            submatrix  (same for all local rows)
137892e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
137992e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
138092e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
138192e8d321SLois Curfman McInnes            it is zero.
138292e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
138379bdfe76SSatish Balay            submatrix (same for all local rows).
138492e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
138592e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
138692e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
138779bdfe76SSatish Balay 
138879bdfe76SSatish Balay    Output Parameter:
138979bdfe76SSatish Balay .  A - the matrix
139079bdfe76SSatish Balay 
139179bdfe76SSatish Balay    Notes:
139279bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
139379bdfe76SSatish Balay    (possibly both).
139479bdfe76SSatish Balay 
139579bdfe76SSatish Balay    Storage Information:
139679bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
139779bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
139879bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
139979bdfe76SSatish Balay    local matrix (a rectangular submatrix).
140079bdfe76SSatish Balay 
140179bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
140279bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
140379bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
140479bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
140579bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
140679bdfe76SSatish Balay 
140779bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
140879bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
140979bdfe76SSatish Balay 
141079bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
141179bdfe76SSatish Balay $         -------------------
141279bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
141379bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
141479bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
141579bdfe76SSatish Balay $         -------------------
141679bdfe76SSatish Balay $
141779bdfe76SSatish Balay 
141879bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
141979bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
142079bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
142157b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
142279bdfe76SSatish Balay 
142379bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
142479bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
142579bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
142692e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
142792e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
14286da5968aSLois Curfman McInnes    matrices.
142979bdfe76SSatish Balay 
143092e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
143179bdfe76SSatish Balay 
143279bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
143379bdfe76SSatish Balay @*/
143479bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
143579bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
143679bdfe76SSatish Balay {
143779bdfe76SSatish Balay   Mat          B;
143879bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
14394c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
144079bdfe76SSatish Balay 
1441e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
144279bdfe76SSatish Balay   *A = 0;
144379bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
144479bdfe76SSatish Balay   PLogObjectCreate(B);
144579bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
144679bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
144779bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
14484c50302cSBarry Smith   MPI_Comm_size(comm,&size);
14494c50302cSBarry Smith   if (size == 1) {
14504c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
14514c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
14524c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
14534c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
14544c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
14554c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
14564c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
14574c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
14584c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
14594c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
14604c50302cSBarry Smith   }
14614c50302cSBarry Smith 
146279bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
146379bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
146490f02eecSBarry Smith   B->mapping    = 0;
146579bdfe76SSatish Balay   B->factor     = 0;
146679bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
146779bdfe76SSatish Balay 
146879bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
146979bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
147079bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
147179bdfe76SSatish Balay 
147279bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1473e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1474e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1475e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1476cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1477cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
147879bdfe76SSatish Balay 
147979bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
148079bdfe76SSatish Balay     work[0] = m; work[1] = n;
148179bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
148279bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
148379bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
148479bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
148579bdfe76SSatish Balay   }
148679bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
148779bdfe76SSatish Balay     Mbs = M/bs;
1488e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
148979bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
149079bdfe76SSatish Balay     m   = mbs*bs;
149179bdfe76SSatish Balay   }
149279bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
149379bdfe76SSatish Balay     Nbs = N/bs;
1494e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
149579bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
149679bdfe76SSatish Balay     n   = nbs*bs;
149779bdfe76SSatish Balay   }
1498e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
149979bdfe76SSatish Balay 
150079bdfe76SSatish Balay   b->m = m; B->m = m;
150179bdfe76SSatish Balay   b->n = n; B->n = n;
150279bdfe76SSatish Balay   b->N = N; B->N = N;
150379bdfe76SSatish Balay   b->M = M; B->M = M;
150479bdfe76SSatish Balay   b->bs  = bs;
150579bdfe76SSatish Balay   b->bs2 = bs*bs;
150679bdfe76SSatish Balay   b->mbs = mbs;
150779bdfe76SSatish Balay   b->nbs = nbs;
150879bdfe76SSatish Balay   b->Mbs = Mbs;
150979bdfe76SSatish Balay   b->Nbs = Nbs;
151079bdfe76SSatish Balay 
151179bdfe76SSatish Balay   /* build local table of row and column ownerships */
151279bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
151379bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
15140ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
151579bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
151679bdfe76SSatish Balay   b->rowners[0] = 0;
151779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
151879bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
151979bdfe76SSatish Balay   }
152079bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
152179bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
15224fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
15234fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
15244fa0d573SSatish Balay 
152579bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
152679bdfe76SSatish Balay   b->cowners[0] = 0;
152779bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
152879bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
152979bdfe76SSatish Balay   }
153079bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
153179bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
15324fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
15334fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
153479bdfe76SSatish Balay 
153579bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
153679bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
153779bdfe76SSatish Balay   PLogObjectParent(B,b->A);
153879bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
153979bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
154079bdfe76SSatish Balay   PLogObjectParent(B,b->B);
154179bdfe76SSatish Balay 
154279bdfe76SSatish Balay   /* build cache for off array entries formed */
154379bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
154490f02eecSBarry Smith   b->donotstash  = 0;
154579bdfe76SSatish Balay   b->colmap      = 0;
154679bdfe76SSatish Balay   b->garray      = 0;
154779bdfe76SSatish Balay   b->roworiented = 1;
154879bdfe76SSatish Balay 
154979bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
155079bdfe76SSatish Balay   b->lvec      = 0;
155179bdfe76SSatish Balay   b->Mvctx     = 0;
155279bdfe76SSatish Balay 
155379bdfe76SSatish Balay   /* stuff for MatGetRow() */
155479bdfe76SSatish Balay   b->rowindices   = 0;
155579bdfe76SSatish Balay   b->rowvalues    = 0;
155679bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
155779bdfe76SSatish Balay 
155879bdfe76SSatish Balay   *A = B;
155979bdfe76SSatish Balay   return 0;
156079bdfe76SSatish Balay }
1561026e39d0SSatish Balay 
15625615d1e5SSatish Balay #undef __FUNC__
15635615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
15640ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
15650ac07820SSatish Balay {
15660ac07820SSatish Balay   Mat        mat;
15670ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
15680ac07820SSatish Balay   int        ierr, len=0, flg;
15690ac07820SSatish Balay 
15700ac07820SSatish Balay   *newmat       = 0;
15710ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
15720ac07820SSatish Balay   PLogObjectCreate(mat);
15730ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
15740ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
15750ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
15760ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
15770ac07820SSatish Balay   mat->factor     = matin->factor;
15780ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
15790ac07820SSatish Balay 
15800ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
15810ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
15820ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
15830ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
15840ac07820SSatish Balay 
15850ac07820SSatish Balay   a->bs  = oldmat->bs;
15860ac07820SSatish Balay   a->bs2 = oldmat->bs2;
15870ac07820SSatish Balay   a->mbs = oldmat->mbs;
15880ac07820SSatish Balay   a->nbs = oldmat->nbs;
15890ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
15900ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
15910ac07820SSatish Balay 
15920ac07820SSatish Balay   a->rstart       = oldmat->rstart;
15930ac07820SSatish Balay   a->rend         = oldmat->rend;
15940ac07820SSatish Balay   a->cstart       = oldmat->cstart;
15950ac07820SSatish Balay   a->cend         = oldmat->cend;
15960ac07820SSatish Balay   a->size         = oldmat->size;
15970ac07820SSatish Balay   a->rank         = oldmat->rank;
15980ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
15990ac07820SSatish Balay   a->rowvalues    = 0;
16000ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
16010ac07820SSatish Balay 
16020ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
16030ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
16040ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
16050ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
16060ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
16070ac07820SSatish Balay   if (oldmat->colmap) {
16080ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
16090ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
16100ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
16110ac07820SSatish Balay   } else a->colmap = 0;
16120ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
16130ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
16140ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
16150ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
16160ac07820SSatish Balay   } else a->garray = 0;
16170ac07820SSatish Balay 
16180ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
16190ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
16200ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
16210ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
16220ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
16230ac07820SSatish Balay   PLogObjectParent(mat,a->A);
16240ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
16250ac07820SSatish Balay   PLogObjectParent(mat,a->B);
16260ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
16270ac07820SSatish Balay   if (flg) {
16280ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
16290ac07820SSatish Balay   }
16300ac07820SSatish Balay   *newmat = mat;
16310ac07820SSatish Balay   return 0;
16320ac07820SSatish Balay }
163357b952d6SSatish Balay 
163457b952d6SSatish Balay #include "sys.h"
163557b952d6SSatish Balay 
16365615d1e5SSatish Balay #undef __FUNC__
16375615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
163857b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
163957b952d6SSatish Balay {
164057b952d6SSatish Balay   Mat          A;
164157b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
164257b952d6SSatish Balay   Scalar       *vals,*buf;
164357b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
164457b952d6SSatish Balay   MPI_Status   status;
1645cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
164657b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
164757b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
164857b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
164957b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
165057b952d6SSatish Balay 
165157b952d6SSatish Balay 
165257b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
165357b952d6SSatish Balay   bs2  = bs*bs;
165457b952d6SSatish Balay 
165557b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
165657b952d6SSatish Balay   if (!rank) {
165757b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
165857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1659e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
166057b952d6SSatish Balay   }
166157b952d6SSatish Balay 
166257b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
166357b952d6SSatish Balay   M = header[1]; N = header[2];
166457b952d6SSatish Balay 
1665e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
166657b952d6SSatish Balay 
166757b952d6SSatish Balay   /*
166857b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
166957b952d6SSatish Balay      divisible by the blocksize
167057b952d6SSatish Balay   */
167157b952d6SSatish Balay   Mbs        = M/bs;
167257b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
167357b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
167457b952d6SSatish Balay   else                  Mbs++;
167557b952d6SSatish Balay   if (extra_rows &&!rank) {
1676b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
167757b952d6SSatish Balay   }
1678537820f0SBarry Smith 
167957b952d6SSatish Balay   /* determine ownership of all rows */
168057b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
168157b952d6SSatish Balay   m   = mbs * bs;
1682cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1683cee3aa6bSSatish Balay   browners = rowners + size + 1;
168457b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
168557b952d6SSatish Balay   rowners[0] = 0;
1686cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1687cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
168857b952d6SSatish Balay   rstart = rowners[rank];
168957b952d6SSatish Balay   rend   = rowners[rank+1];
169057b952d6SSatish Balay 
169157b952d6SSatish Balay   /* distribute row lengths to all processors */
169257b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
169357b952d6SSatish Balay   if (!rank) {
169457b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
169557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
169657b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
169757b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1698cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1699cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
170057b952d6SSatish Balay     PetscFree(sndcounts);
170157b952d6SSatish Balay   }
170257b952d6SSatish Balay   else {
170357b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
170457b952d6SSatish Balay   }
170557b952d6SSatish Balay 
170657b952d6SSatish Balay   if (!rank) {
170757b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
170857b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
170957b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
171057b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
171157b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
171257b952d6SSatish Balay         procsnz[i] += rowlengths[j];
171357b952d6SSatish Balay       }
171457b952d6SSatish Balay     }
171557b952d6SSatish Balay     PetscFree(rowlengths);
171657b952d6SSatish Balay 
171757b952d6SSatish Balay     /* determine max buffer needed and allocate it */
171857b952d6SSatish Balay     maxnz = 0;
171957b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
172057b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
172157b952d6SSatish Balay     }
172257b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
172357b952d6SSatish Balay 
172457b952d6SSatish Balay     /* read in my part of the matrix column indices  */
172557b952d6SSatish Balay     nz = procsnz[0];
172657b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
172757b952d6SSatish Balay     mycols = ibuf;
1728cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
172957b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1730cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1731cee3aa6bSSatish Balay 
173257b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
173357b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
173457b952d6SSatish Balay       nz = procsnz[i];
173557b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
173657b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
173757b952d6SSatish Balay     }
173857b952d6SSatish Balay     /* read in the stuff for the last proc */
173957b952d6SSatish Balay     if ( size != 1 ) {
174057b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
174157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
174257b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1743cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
174457b952d6SSatish Balay     }
174557b952d6SSatish Balay     PetscFree(cols);
174657b952d6SSatish Balay   }
174757b952d6SSatish Balay   else {
174857b952d6SSatish Balay     /* determine buffer space needed for message */
174957b952d6SSatish Balay     nz = 0;
175057b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
175157b952d6SSatish Balay       nz += locrowlens[i];
175257b952d6SSatish Balay     }
175357b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
175457b952d6SSatish Balay     mycols = ibuf;
175557b952d6SSatish Balay     /* receive message of column indices*/
175657b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
175757b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1758e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
175957b952d6SSatish Balay   }
176057b952d6SSatish Balay 
176157b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1762cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1763cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
176457b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1765cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
176657b952d6SSatish Balay   masked1 = mask    + Mbs;
176757b952d6SSatish Balay   masked2 = masked1 + Mbs;
176857b952d6SSatish Balay   rowcount = 0; nzcount = 0;
176957b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
177057b952d6SSatish Balay     dcount  = 0;
177157b952d6SSatish Balay     odcount = 0;
177257b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
177357b952d6SSatish Balay       kmax = locrowlens[rowcount];
177457b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
177557b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
177657b952d6SSatish Balay         if (!mask[tmp]) {
177757b952d6SSatish Balay           mask[tmp] = 1;
177857b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
177957b952d6SSatish Balay           else masked1[dcount++] = tmp;
178057b952d6SSatish Balay         }
178157b952d6SSatish Balay       }
178257b952d6SSatish Balay       rowcount++;
178357b952d6SSatish Balay     }
1784cee3aa6bSSatish Balay 
178557b952d6SSatish Balay     dlens[i]  = dcount;
178657b952d6SSatish Balay     odlens[i] = odcount;
1787cee3aa6bSSatish Balay 
178857b952d6SSatish Balay     /* zero out the mask elements we set */
178957b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
179057b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
179157b952d6SSatish Balay   }
1792cee3aa6bSSatish Balay 
179357b952d6SSatish Balay   /* create our matrix */
1794537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1795537820f0SBarry Smith          CHKERRQ(ierr);
179657b952d6SSatish Balay   A = *newmat;
17976d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
179857b952d6SSatish Balay 
179957b952d6SSatish Balay   if (!rank) {
180057b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
180157b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
180257b952d6SSatish Balay     nz = procsnz[0];
180357b952d6SSatish Balay     vals = buf;
1804cee3aa6bSSatish Balay     mycols = ibuf;
1805cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
180657b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1807cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1808537820f0SBarry Smith 
180957b952d6SSatish Balay     /* insert into matrix */
181057b952d6SSatish Balay     jj      = rstart*bs;
181157b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
181257b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
181357b952d6SSatish Balay       mycols += locrowlens[i];
181457b952d6SSatish Balay       vals   += locrowlens[i];
181557b952d6SSatish Balay       jj++;
181657b952d6SSatish Balay     }
181757b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
181857b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
181957b952d6SSatish Balay       nz = procsnz[i];
182057b952d6SSatish Balay       vals = buf;
182157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
182257b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
182357b952d6SSatish Balay     }
182457b952d6SSatish Balay     /* the last proc */
182557b952d6SSatish Balay     if ( size != 1 ){
182657b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1827cee3aa6bSSatish Balay       vals = buf;
182857b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
182957b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1830cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
183157b952d6SSatish Balay     }
183257b952d6SSatish Balay     PetscFree(procsnz);
183357b952d6SSatish Balay   }
183457b952d6SSatish Balay   else {
183557b952d6SSatish Balay     /* receive numeric values */
183657b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
183757b952d6SSatish Balay 
183857b952d6SSatish Balay     /* receive message of values*/
183957b952d6SSatish Balay     vals = buf;
1840cee3aa6bSSatish Balay     mycols = ibuf;
184157b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
184257b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1843e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
184457b952d6SSatish Balay 
184557b952d6SSatish Balay     /* insert into matrix */
184657b952d6SSatish Balay     jj      = rstart*bs;
1847cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
184857b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
184957b952d6SSatish Balay       mycols += locrowlens[i];
185057b952d6SSatish Balay       vals   += locrowlens[i];
185157b952d6SSatish Balay       jj++;
185257b952d6SSatish Balay     }
185357b952d6SSatish Balay   }
185457b952d6SSatish Balay   PetscFree(locrowlens);
185557b952d6SSatish Balay   PetscFree(buf);
185657b952d6SSatish Balay   PetscFree(ibuf);
185757b952d6SSatish Balay   PetscFree(rowners);
185857b952d6SSatish Balay   PetscFree(dlens);
1859cee3aa6bSSatish Balay   PetscFree(mask);
18606d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
18616d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
186257b952d6SSatish Balay   return 0;
186357b952d6SSatish Balay }
186457b952d6SSatish Balay 
186557b952d6SSatish Balay 
1866