xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 30793edc096a79a776e222bc7117c70553e9bed7)
179bdfe76SSatish Balay #ifndef lint
2*30793edcSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.59 1997/03/26 01:36:18 bsmith Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
53369ce9aSBarry Smith #include "pinclude/pviewer.h"
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
879bdfe76SSatish Balay 
957b952d6SSatish Balay 
1057b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1157b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
12d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1493bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1593bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1793bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1893bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2193bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2257b952d6SSatish Balay 
233b2fbd54SBarry Smith 
24537820f0SBarry Smith /*
25537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2857b952d6SSatish Balay    length of colmap equals the global matrix length.
2957b952d6SSatish Balay */
305615d1e5SSatish Balay #undef __FUNC__
315615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3357b952d6SSatish Balay {
3457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
36928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3757b952d6SSatish Balay 
3857b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
3957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4057b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
41928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4257b952d6SSatish Balay   return 0;
4357b952d6SSatish Balay }
4457b952d6SSatish Balay 
455615d1e5SSatish Balay #undef __FUNC__
465615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
473b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
483b2fbd54SBarry Smith                             PetscTruth *done)
4957b952d6SSatish Balay {
503b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5157b952d6SSatish Balay   int         ierr;
523b2fbd54SBarry Smith   if (aij->size == 1) {
533b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
54e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
553b2fbd54SBarry Smith   return 0;
563b2fbd54SBarry Smith }
573b2fbd54SBarry Smith 
585615d1e5SSatish Balay #undef __FUNC__
595615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
603b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
613b2fbd54SBarry Smith                                 PetscTruth *done)
623b2fbd54SBarry Smith {
633b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
643b2fbd54SBarry Smith   int        ierr;
653b2fbd54SBarry Smith   if (aij->size == 1) {
663b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
67e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6857b952d6SSatish Balay   return 0;
6957b952d6SSatish Balay }
7080c1aa95SSatish Balay #define CHUNKSIZE  10
7180c1aa95SSatish Balay 
72f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
7380c1aa95SSatish Balay { \
7480c1aa95SSatish Balay  \
7580c1aa95SSatish Balay     brow = row/bs;  \
7680c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
7780c1aa95SSatish Balay     rmax = imax[brow]; nrow = ailen[brow]; \
7880c1aa95SSatish Balay       bcol = col/bs; \
7980c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
80ab26458aSBarry Smith       low = 0; high = nrow; \
81ab26458aSBarry Smith       while (high-low > 3) { \
82ab26458aSBarry Smith         t = (low+high)/2; \
83ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
84ab26458aSBarry Smith         else              low  = t; \
85ab26458aSBarry Smith       } \
86ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
8780c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8880c1aa95SSatish Balay         if (rp[_i] == bcol) { \
8980c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
90eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
91eada6651SSatish Balay           else                    *bap  = value;  \
9280c1aa95SSatish Balay           goto _noinsert; \
9380c1aa95SSatish Balay         } \
9480c1aa95SSatish Balay       } \
950520107fSSatish Balay       if (a->nonew) goto _noinsert; \
9680c1aa95SSatish Balay       if (nrow >= rmax) { \
9780c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9880c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
9980c1aa95SSatish Balay         Scalar *new_a; \
10080c1aa95SSatish Balay  \
10180c1aa95SSatish Balay         /* malloc new storage space */ \
10280c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
10380c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
10480c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10580c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10680c1aa95SSatish Balay  \
10780c1aa95SSatish Balay         /* copy over old data into new slots */ \
10880c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
10980c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
11080c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
11180c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
11280c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
11380c1aa95SSatish Balay                                                            len*sizeof(int)); \
11480c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11580c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11680c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
11780c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
11880c1aa95SSatish Balay         /* free up old matrix storage */ \
11980c1aa95SSatish Balay         PetscFree(a->a);  \
12080c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
12180c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
12280c1aa95SSatish Balay         a->singlemalloc = 1; \
12380c1aa95SSatish Balay  \
12480c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
12580c1aa95SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE; \
12680c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
12780c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
12880c1aa95SSatish Balay         a->reallocs++; \
12980c1aa95SSatish Balay         a->nz++; \
13080c1aa95SSatish Balay       } \
13180c1aa95SSatish Balay       N = nrow++ - 1;  \
13280c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
13380c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
13480c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13580c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13680c1aa95SSatish Balay       } \
13780c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
13880c1aa95SSatish Balay       rp[_i]                      = bcol;  \
13980c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
14080c1aa95SSatish Balay       _noinsert:; \
14180c1aa95SSatish Balay     ailen[brow] = nrow; \
14280c1aa95SSatish Balay }
14357b952d6SSatish Balay 
144639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
1455615d1e5SSatish Balay #undef __FUNC__
1465615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
147ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
14857b952d6SSatish Balay {
14957b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
15057b952d6SSatish Balay   Scalar      value;
1514fa0d573SSatish Balay   int         ierr,i,j,row,col;
1524fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1534fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1544fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
15557b952d6SSatish Balay 
156eada6651SSatish Balay   /* Some Variables required in the macro */
15780c1aa95SSatish Balay   Mat         A = baij->A;
15880c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
15980c1aa95SSatish Balay   int         *rp,ii,nrow,_i,rmax,N;
16080c1aa95SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
16180c1aa95SSatish Balay   int         *aj=a->j,brow,bcol;
162ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
16380c1aa95SSatish Balay   Scalar      *ap,*aa=a->a,*bap;
16480c1aa95SSatish Balay 
16557b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
166639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
167e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
168e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
169639f9d9dSBarry Smith #endif
17057b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
17157b952d6SSatish Balay       row = im[i] - rstart_orig;
17257b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
17357b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
17457b952d6SSatish Balay           col = in[j] - cstart_orig;
17557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
176f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
17780c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
17857b952d6SSatish Balay         }
179639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
180e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
181e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
182639f9d9dSBarry Smith #endif
18357b952d6SSatish Balay         else {
18457b952d6SSatish Balay           if (mat->was_assembled) {
185905e6a2fSBarry Smith             if (!baij->colmap) {
186905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
187905e6a2fSBarry Smith             }
188905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
18957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
19057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
19157b952d6SSatish Balay               col =  in[j];
19257b952d6SSatish Balay             }
19357b952d6SSatish Balay           }
19457b952d6SSatish Balay           else col = in[j];
19557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
196639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
19757b952d6SSatish Balay         }
19857b952d6SSatish Balay       }
19957b952d6SSatish Balay     }
20057b952d6SSatish Balay     else {
20190f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
20257b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
20357b952d6SSatish Balay       }
20457b952d6SSatish Balay       else {
20590f02eecSBarry Smith         if (!baij->donotstash) {
20657b952d6SSatish Balay           row = im[i];
20757b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
20857b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
20957b952d6SSatish Balay           }
21057b952d6SSatish Balay         }
21157b952d6SSatish Balay       }
21257b952d6SSatish Balay     }
21390f02eecSBarry Smith   }
21457b952d6SSatish Balay   return 0;
21557b952d6SSatish Balay }
21657b952d6SSatish Balay 
217ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
218ab26458aSBarry Smith #undef __FUNC__
219ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
220ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
221ab26458aSBarry Smith {
222ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
223*30793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
224abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
225ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
226ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
227ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
228ab26458aSBarry Smith 
229*30793edcSSatish Balay 
230*30793edcSSatish Balay   if(!barray) {
231*30793edcSSatish Balay     barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
232*30793edcSSatish Balay     baij->barray = barray;
233*30793edcSSatish Balay   }
234*30793edcSSatish Balay 
235ab26458aSBarry Smith   if (roworiented) {
236ab26458aSBarry Smith     stepval = (n-1)*bs;
237ab26458aSBarry Smith   } else {
238ab26458aSBarry Smith     stepval = (m-1)*bs;
239ab26458aSBarry Smith   }
240ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
241ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
242ab26458aSBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
243ab26458aSBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(1,0,"Row too large");
244ab26458aSBarry Smith #endif
245ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
246ab26458aSBarry Smith       row = im[i] - rstart;
247ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
248ab26458aSBarry Smith         if (roworiented) {
249ab26458aSBarry Smith           value = v + i*(stepval+bs)*bs + j*bs;
250ab26458aSBarry Smith         } else {
251ab26458aSBarry Smith           value = v + j*(stepval+bs)*bs + i*bs;
252abef11f7SSatish Balay         }
253ab26458aSBarry Smith         for ( ii=0; ii<bs; ii++,value+=stepval )
254ab26458aSBarry Smith           for (jj=0; jj<bs; jj++ )
255*30793edcSSatish Balay             *barray++  = *value++;
256*30793edcSSatish Balay         barray -=bs2;
257abef11f7SSatish Balay 
258abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
259abef11f7SSatish Balay           col = in[j] - cstart;
260*30793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
261ab26458aSBarry Smith         }
262ab26458aSBarry Smith #if defined(PETSC_BOPT_g)
263ab26458aSBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
264ab26458aSBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(1,0,"Col too large");}
265ab26458aSBarry Smith #endif
266ab26458aSBarry Smith         else {
267ab26458aSBarry Smith           if (mat->was_assembled) {
268ab26458aSBarry Smith             if (!baij->colmap) {
269ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
270ab26458aSBarry Smith             }
271ab26458aSBarry Smith             col = baij->colmap[in[j]] - 1;
272ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
273ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
274ab26458aSBarry Smith               col =  in[j];
275ab26458aSBarry Smith             }
276ab26458aSBarry Smith           }
277ab26458aSBarry Smith           else col = in[j];
278*30793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
279ab26458aSBarry Smith         }
280ab26458aSBarry Smith       }
281ab26458aSBarry Smith     }
282ab26458aSBarry Smith     else {
283ab26458aSBarry Smith       if (!baij->donotstash) {
284ab26458aSBarry Smith         if (roworiented ) {
285abef11f7SSatish Balay           row   = im[i]*bs;
286abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
287abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
288abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
289abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
290abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
291abef11f7SSatish Balay               }
292ab26458aSBarry Smith             }
293ab26458aSBarry Smith           }
294ab26458aSBarry Smith         }
295ab26458aSBarry Smith         else {
296ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
297abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
298abef11f7SSatish Balay             col   = in[j]*bs;
299abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
300abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
301abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
302ab26458aSBarry Smith               }
303ab26458aSBarry Smith             }
304ab26458aSBarry Smith           }
305abef11f7SSatish Balay 
306abef11f7SSatish Balay         }
307abef11f7SSatish Balay       }
308ab26458aSBarry Smith     }
309ab26458aSBarry Smith   }
310ab26458aSBarry Smith   return 0;
311ab26458aSBarry Smith }
312ab26458aSBarry Smith 
3135615d1e5SSatish Balay #undef __FUNC__
3145615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
315ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
316d6de1c52SSatish Balay {
317d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
318d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
319d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
320d6de1c52SSatish Balay 
321d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
322e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
323e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
324d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
325d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
326d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
327e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
328e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
329d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
330d6de1c52SSatish Balay           col = idxn[j] - bscstart;
331d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
332d6de1c52SSatish Balay         }
333d6de1c52SSatish Balay         else {
334905e6a2fSBarry Smith           if (!baij->colmap) {
335905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336905e6a2fSBarry Smith           }
337e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
338dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
339d9d09a02SSatish Balay           else {
340dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
341d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
342d6de1c52SSatish Balay           }
343d6de1c52SSatish Balay         }
344d6de1c52SSatish Balay       }
345d9d09a02SSatish Balay     }
346d6de1c52SSatish Balay     else {
347e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
348d6de1c52SSatish Balay     }
349d6de1c52SSatish Balay   }
350d6de1c52SSatish Balay   return 0;
351d6de1c52SSatish Balay }
352d6de1c52SSatish Balay 
3535615d1e5SSatish Balay #undef __FUNC__
3545615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
355ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
356d6de1c52SSatish Balay {
357d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
358d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
359acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
360d6de1c52SSatish Balay   double     sum = 0.0;
361d6de1c52SSatish Balay   Scalar     *v;
362d6de1c52SSatish Balay 
363d6de1c52SSatish Balay   if (baij->size == 1) {
364d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
365d6de1c52SSatish Balay   } else {
366d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
367d6de1c52SSatish Balay       v = amat->a;
368d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
369d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
370d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
371d6de1c52SSatish Balay #else
372d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
373d6de1c52SSatish Balay #endif
374d6de1c52SSatish Balay       }
375d6de1c52SSatish Balay       v = bmat->a;
376d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
377d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
378d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
379d6de1c52SSatish Balay #else
380d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
381d6de1c52SSatish Balay #endif
382d6de1c52SSatish Balay       }
383d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
384d6de1c52SSatish Balay       *norm = sqrt(*norm);
385d6de1c52SSatish Balay     }
386acdf5bf4SSatish Balay     else
387e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
388d6de1c52SSatish Balay   }
389d6de1c52SSatish Balay   return 0;
390d6de1c52SSatish Balay }
39157b952d6SSatish Balay 
3925615d1e5SSatish Balay #undef __FUNC__
3935615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
394ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
39557b952d6SSatish Balay {
39657b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
39757b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
39857b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
39957b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
40057b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
40157b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
40257b952d6SSatish Balay   InsertMode  addv;
40357b952d6SSatish Balay   Scalar      *rvalues,*svalues;
40457b952d6SSatish Balay 
40557b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
406e0fa3b82SLois Curfman McInnes   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
40757b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
408e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
40957b952d6SSatish Balay   }
410e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
41157b952d6SSatish Balay 
41257b952d6SSatish Balay   /*  first count number of contributors to each processor */
41357b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
41457b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
41557b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
41657b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
41757b952d6SSatish Balay     idx = baij->stash.idx[i];
41857b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
41957b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
42057b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
42157b952d6SSatish Balay       }
42257b952d6SSatish Balay     }
42357b952d6SSatish Balay   }
42457b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
42557b952d6SSatish Balay 
42657b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
42757b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
42857b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
42957b952d6SSatish Balay   nreceives = work[rank];
43057b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
43157b952d6SSatish Balay   nmax = work[rank];
43257b952d6SSatish Balay   PetscFree(work);
43357b952d6SSatish Balay 
43457b952d6SSatish Balay   /* post receives:
43557b952d6SSatish Balay        1) each message will consist of ordered pairs
43657b952d6SSatish Balay      (global index,value) we store the global index as a double
43757b952d6SSatish Balay      to simplify the message passing.
43857b952d6SSatish Balay        2) since we don't know how long each individual message is we
43957b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
44057b952d6SSatish Balay      this is a lot of wasted space.
44157b952d6SSatish Balay 
44257b952d6SSatish Balay 
44357b952d6SSatish Balay        This could be done better.
44457b952d6SSatish Balay   */
44557b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
44657b952d6SSatish Balay   CHKPTRQ(rvalues);
44757b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
44857b952d6SSatish Balay   CHKPTRQ(recv_waits);
44957b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
45057b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
45157b952d6SSatish Balay               comm,recv_waits+i);
45257b952d6SSatish Balay   }
45357b952d6SSatish Balay 
45457b952d6SSatish Balay   /* do sends:
45557b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
45657b952d6SSatish Balay          the ith processor
45757b952d6SSatish Balay   */
45857b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
45957b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
46057b952d6SSatish Balay   CHKPTRQ(send_waits);
46157b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
46257b952d6SSatish Balay   starts[0] = 0;
46357b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
46457b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
46557b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
46657b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
46757b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
46857b952d6SSatish Balay   }
46957b952d6SSatish Balay   PetscFree(owner);
47057b952d6SSatish Balay   starts[0] = 0;
47157b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
47257b952d6SSatish Balay   count = 0;
47357b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
47457b952d6SSatish Balay     if (procs[i]) {
47557b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
47657b952d6SSatish Balay                 comm,send_waits+count++);
47757b952d6SSatish Balay     }
47857b952d6SSatish Balay   }
47957b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
48057b952d6SSatish Balay 
48157b952d6SSatish Balay   /* Free cache space */
482d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
48357b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
48457b952d6SSatish Balay 
48557b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
48657b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
48757b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
48857b952d6SSatish Balay   baij->rmax       = nmax;
48957b952d6SSatish Balay 
49057b952d6SSatish Balay   return 0;
49157b952d6SSatish Balay }
49257b952d6SSatish Balay 
49357b952d6SSatish Balay 
4945615d1e5SSatish Balay #undef __FUNC__
4955615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
496ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
49757b952d6SSatish Balay {
49857b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
49957b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
50057b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
50157b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
50257b952d6SSatish Balay   Scalar      *values,val;
503e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
50457b952d6SSatish Balay 
50557b952d6SSatish Balay   /*  wait on receives */
50657b952d6SSatish Balay   while (count) {
50757b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
50857b952d6SSatish Balay     /* unpack receives into our local space */
50957b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
51057b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
51157b952d6SSatish Balay     n = n/3;
51257b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
51357b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
51457b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
51557b952d6SSatish Balay       val = values[3*i+2];
51657b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
51757b952d6SSatish Balay         col -= baij->cstart*bs;
51857b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
51957b952d6SSatish Balay       }
52057b952d6SSatish Balay       else {
52157b952d6SSatish Balay         if (mat->was_assembled) {
522905e6a2fSBarry Smith           if (!baij->colmap) {
523905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
524905e6a2fSBarry Smith           }
525905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
52657b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
52757b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
52857b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
52957b952d6SSatish Balay           }
53057b952d6SSatish Balay         }
53157b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
53257b952d6SSatish Balay       }
53357b952d6SSatish Balay     }
53457b952d6SSatish Balay     count--;
53557b952d6SSatish Balay   }
53657b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
53757b952d6SSatish Balay 
53857b952d6SSatish Balay   /* wait on sends */
53957b952d6SSatish Balay   if (baij->nsends) {
54057b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
54157b952d6SSatish Balay     CHKPTRQ(send_status);
54257b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
54357b952d6SSatish Balay     PetscFree(send_status);
54457b952d6SSatish Balay   }
54557b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
54657b952d6SSatish Balay 
54757b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
54857b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
54957b952d6SSatish Balay 
55057b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
55157b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
55257b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
55357b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
55457b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
55557b952d6SSatish Balay   }
55657b952d6SSatish Balay 
5576d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
55857b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
55957b952d6SSatish Balay   }
56057b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
56157b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
56257b952d6SSatish Balay 
56357b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
56457b952d6SSatish Balay   return 0;
56557b952d6SSatish Balay }
56657b952d6SSatish Balay 
5675615d1e5SSatish Balay #undef __FUNC__
5685615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
56957b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
57057b952d6SSatish Balay {
57157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
57257b952d6SSatish Balay   int          ierr;
57357b952d6SSatish Balay 
57457b952d6SSatish Balay   if (baij->size == 1) {
57557b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
57657b952d6SSatish Balay   }
577e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
57857b952d6SSatish Balay   return 0;
57957b952d6SSatish Balay }
58057b952d6SSatish Balay 
5815615d1e5SSatish Balay #undef __FUNC__
5825615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
58357b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
58457b952d6SSatish Balay {
58557b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
586cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
58757b952d6SSatish Balay   FILE         *fd;
58857b952d6SSatish Balay   ViewerType   vtype;
58957b952d6SSatish Balay 
59057b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
59157b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
59257b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
593639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
5944e220ebcSLois Curfman McInnes       MatInfo info;
59557b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
59657b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
5974e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
59857b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
59957b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
6004e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
6014e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
6024e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
6034e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
6044e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
6054e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
60657b952d6SSatish Balay       fflush(fd);
60757b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
60857b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
60957b952d6SSatish Balay       return 0;
61057b952d6SSatish Balay     }
611639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
612bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
61357b952d6SSatish Balay       return 0;
61457b952d6SSatish Balay     }
61557b952d6SSatish Balay   }
61657b952d6SSatish Balay 
61757b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
61857b952d6SSatish Balay     Draw       draw;
61957b952d6SSatish Balay     PetscTruth isnull;
62057b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
62157b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
62257b952d6SSatish Balay   }
62357b952d6SSatish Balay 
62457b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
62557b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
62657b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
62757b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
62857b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
62957b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
63057b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
63157b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
63257b952d6SSatish Balay     fflush(fd);
63357b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
63457b952d6SSatish Balay   }
63557b952d6SSatish Balay   else {
63657b952d6SSatish Balay     int size = baij->size;
63757b952d6SSatish Balay     rank = baij->rank;
63857b952d6SSatish Balay     if (size == 1) {
63957b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
64057b952d6SSatish Balay     }
64157b952d6SSatish Balay     else {
64257b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
64357b952d6SSatish Balay       Mat         A;
64457b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
64557b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
64657b952d6SSatish Balay       int         mbs=baij->mbs;
64757b952d6SSatish Balay       Scalar      *a;
64857b952d6SSatish Balay 
64957b952d6SSatish Balay       if (!rank) {
650cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
65157b952d6SSatish Balay         CHKERRQ(ierr);
65257b952d6SSatish Balay       }
65357b952d6SSatish Balay       else {
654cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
65557b952d6SSatish Balay         CHKERRQ(ierr);
65657b952d6SSatish Balay       }
65757b952d6SSatish Balay       PLogObjectParent(mat,A);
65857b952d6SSatish Balay 
65957b952d6SSatish Balay       /* copy over the A part */
66057b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
66157b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
66257b952d6SSatish Balay       row = baij->rstart;
66357b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
66457b952d6SSatish Balay 
66557b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
66657b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
66757b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
66857b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
66957b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
67057b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
671cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
672cee3aa6bSSatish Balay             col++; a += bs;
67357b952d6SSatish Balay           }
67457b952d6SSatish Balay         }
67557b952d6SSatish Balay       }
67657b952d6SSatish Balay       /* copy over the B part */
67757b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
67857b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
67957b952d6SSatish Balay       row = baij->rstart*bs;
68057b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
68157b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
68257b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
68357b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
68457b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
68557b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
686cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
687cee3aa6bSSatish Balay             col++; a += bs;
68857b952d6SSatish Balay           }
68957b952d6SSatish Balay         }
69057b952d6SSatish Balay       }
69157b952d6SSatish Balay       PetscFree(rvals);
6926d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
6936d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
69457b952d6SSatish Balay       if (!rank) {
69557b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
69657b952d6SSatish Balay       }
69757b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
69857b952d6SSatish Balay     }
69957b952d6SSatish Balay   }
70057b952d6SSatish Balay   return 0;
70157b952d6SSatish Balay }
70257b952d6SSatish Balay 
70357b952d6SSatish Balay 
70457b952d6SSatish Balay 
7055615d1e5SSatish Balay #undef __FUNC__
7065615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
707ec1ea8d8SLois Curfman McInnes int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
70857b952d6SSatish Balay {
70957b952d6SSatish Balay   Mat         mat = (Mat) obj;
71057b952d6SSatish Balay   int         ierr;
71157b952d6SSatish Balay   ViewerType  vtype;
71257b952d6SSatish Balay 
71357b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
71457b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
71557b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
71657b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
71757b952d6SSatish Balay   }
71857b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
71957b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
72057b952d6SSatish Balay   }
72157b952d6SSatish Balay   return 0;
72257b952d6SSatish Balay }
72357b952d6SSatish Balay 
7245615d1e5SSatish Balay #undef __FUNC__
7255615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
726ec1ea8d8SLois Curfman McInnes int MatDestroy_MPIBAIJ(PetscObject obj)
72779bdfe76SSatish Balay {
72879bdfe76SSatish Balay   Mat         mat = (Mat) obj;
72979bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
73079bdfe76SSatish Balay   int         ierr;
73179bdfe76SSatish Balay 
73279bdfe76SSatish Balay #if defined(PETSC_LOG)
73379bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
73479bdfe76SSatish Balay #endif
73579bdfe76SSatish Balay 
73679bdfe76SSatish Balay   PetscFree(baij->rowners);
73779bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
73879bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
73979bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
74079bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
74179bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
74279bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
74379bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
744*30793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
74579bdfe76SSatish Balay   PetscFree(baij);
74690f02eecSBarry Smith   if (mat->mapping) {
74790f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
74890f02eecSBarry Smith   }
74979bdfe76SSatish Balay   PLogObjectDestroy(mat);
75079bdfe76SSatish Balay   PetscHeaderDestroy(mat);
75179bdfe76SSatish Balay   return 0;
75279bdfe76SSatish Balay }
75379bdfe76SSatish Balay 
7545615d1e5SSatish Balay #undef __FUNC__
7555615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
756ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
757cee3aa6bSSatish Balay {
758cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
75947b4a8eaSLois Curfman McInnes   int         ierr, nt;
760cee3aa6bSSatish Balay 
761c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
76247b4a8eaSLois Curfman McInnes   if (nt != a->n) {
763ab26458aSBarry Smith     SETERRQ(1,0,"Incompatible partition of A and xx");
76447b4a8eaSLois Curfman McInnes   }
765c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
76647b4a8eaSLois Curfman McInnes   if (nt != a->m) {
767e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
76847b4a8eaSLois Curfman McInnes   }
76943a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
770cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
77143a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
772cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
77343a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
774cee3aa6bSSatish Balay   return 0;
775cee3aa6bSSatish Balay }
776cee3aa6bSSatish Balay 
7775615d1e5SSatish Balay #undef __FUNC__
7785615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
779ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
780cee3aa6bSSatish Balay {
781cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
782cee3aa6bSSatish Balay   int        ierr;
78343a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
784cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
78543a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
786cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
787cee3aa6bSSatish Balay   return 0;
788cee3aa6bSSatish Balay }
789cee3aa6bSSatish Balay 
7905615d1e5SSatish Balay #undef __FUNC__
7915615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
792ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
793cee3aa6bSSatish Balay {
794cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
795cee3aa6bSSatish Balay   int        ierr;
796cee3aa6bSSatish Balay 
797cee3aa6bSSatish Balay   /* do nondiagonal part */
798cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
799cee3aa6bSSatish Balay   /* send it on its way */
800537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
801cee3aa6bSSatish Balay   /* do local part */
802cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
803cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
804cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
805cee3aa6bSSatish Balay   /* but is not perhaps always true. */
806639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
807cee3aa6bSSatish Balay   return 0;
808cee3aa6bSSatish Balay }
809cee3aa6bSSatish Balay 
8105615d1e5SSatish Balay #undef __FUNC__
8115615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
812ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
813cee3aa6bSSatish Balay {
814cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
815cee3aa6bSSatish Balay   int        ierr;
816cee3aa6bSSatish Balay 
817cee3aa6bSSatish Balay   /* do nondiagonal part */
818cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
819cee3aa6bSSatish Balay   /* send it on its way */
820537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
821cee3aa6bSSatish Balay   /* do local part */
822cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
823cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
824cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
825cee3aa6bSSatish Balay   /* but is not perhaps always true. */
826537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
827cee3aa6bSSatish Balay   return 0;
828cee3aa6bSSatish Balay }
829cee3aa6bSSatish Balay 
830cee3aa6bSSatish Balay /*
831cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
832cee3aa6bSSatish Balay    diagonal block
833cee3aa6bSSatish Balay */
8345615d1e5SSatish Balay #undef __FUNC__
8355615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
836ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
837cee3aa6bSSatish Balay {
838cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
839cee3aa6bSSatish Balay   if (a->M != a->N)
840e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
841cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
842cee3aa6bSSatish Balay }
843cee3aa6bSSatish Balay 
8445615d1e5SSatish Balay #undef __FUNC__
8455615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
846ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
847cee3aa6bSSatish Balay {
848cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
849cee3aa6bSSatish Balay   int        ierr;
850cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
851cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
852cee3aa6bSSatish Balay   return 0;
853cee3aa6bSSatish Balay }
854026e39d0SSatish Balay 
8555615d1e5SSatish Balay #undef __FUNC__
8565615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
857ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
85857b952d6SSatish Balay {
85957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
86057b952d6SSatish Balay   *m = mat->M; *n = mat->N;
86157b952d6SSatish Balay   return 0;
86257b952d6SSatish Balay }
86357b952d6SSatish Balay 
8645615d1e5SSatish Balay #undef __FUNC__
8655615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
866ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
86757b952d6SSatish Balay {
86857b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
86957b952d6SSatish Balay   *m = mat->m; *n = mat->N;
87057b952d6SSatish Balay   return 0;
87157b952d6SSatish Balay }
87257b952d6SSatish Balay 
8735615d1e5SSatish Balay #undef __FUNC__
8745615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
875ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
87657b952d6SSatish Balay {
87757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
87857b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
87957b952d6SSatish Balay   return 0;
88057b952d6SSatish Balay }
88157b952d6SSatish Balay 
882acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
883acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
884acdf5bf4SSatish Balay 
8855615d1e5SSatish Balay #undef __FUNC__
8865615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
887acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
888acdf5bf4SSatish Balay {
889acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
890acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
891acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
892d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
893d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
894acdf5bf4SSatish Balay 
895e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
896acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
897acdf5bf4SSatish Balay 
898acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
899acdf5bf4SSatish Balay     /*
900acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
901acdf5bf4SSatish Balay     */
902acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
903bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
904bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
905acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
906acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
907acdf5bf4SSatish Balay     }
908acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
909acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
910acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
911acdf5bf4SSatish Balay   }
912acdf5bf4SSatish Balay 
913acdf5bf4SSatish Balay 
914e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
915d9d09a02SSatish Balay   lrow = row - brstart;
916acdf5bf4SSatish Balay 
917acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
918acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
919acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
920acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
921acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
922acdf5bf4SSatish Balay   nztot = nzA + nzB;
923acdf5bf4SSatish Balay 
924acdf5bf4SSatish Balay   cmap  = mat->garray;
925acdf5bf4SSatish Balay   if (v  || idx) {
926acdf5bf4SSatish Balay     if (nztot) {
927acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
928acdf5bf4SSatish Balay       int imark = -1;
929acdf5bf4SSatish Balay       if (v) {
930acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
931acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
932d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
933acdf5bf4SSatish Balay           else break;
934acdf5bf4SSatish Balay         }
935acdf5bf4SSatish Balay         imark = i;
936acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
937acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
938acdf5bf4SSatish Balay       }
939acdf5bf4SSatish Balay       if (idx) {
940acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
941acdf5bf4SSatish Balay         if (imark > -1) {
942acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
943bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
944acdf5bf4SSatish Balay           }
945acdf5bf4SSatish Balay         } else {
946acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
947d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
948d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
949acdf5bf4SSatish Balay             else break;
950acdf5bf4SSatish Balay           }
951acdf5bf4SSatish Balay           imark = i;
952acdf5bf4SSatish Balay         }
953d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
954d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
955acdf5bf4SSatish Balay       }
956acdf5bf4SSatish Balay     }
957d212a18eSSatish Balay     else {
958d212a18eSSatish Balay       if (idx) *idx = 0;
959d212a18eSSatish Balay       if (v)   *v   = 0;
960d212a18eSSatish Balay     }
961acdf5bf4SSatish Balay   }
962acdf5bf4SSatish Balay   *nz = nztot;
963acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
964acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
965acdf5bf4SSatish Balay   return 0;
966acdf5bf4SSatish Balay }
967acdf5bf4SSatish Balay 
9685615d1e5SSatish Balay #undef __FUNC__
9695615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
970acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
971acdf5bf4SSatish Balay {
972acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
973acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
974e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
975acdf5bf4SSatish Balay   }
976acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
977acdf5bf4SSatish Balay   return 0;
978acdf5bf4SSatish Balay }
979acdf5bf4SSatish Balay 
9805615d1e5SSatish Balay #undef __FUNC__
9815615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
982ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
9835a838052SSatish Balay {
9845a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
9855a838052SSatish Balay   *bs = baij->bs;
9865a838052SSatish Balay   return 0;
9875a838052SSatish Balay }
9885a838052SSatish Balay 
9895615d1e5SSatish Balay #undef __FUNC__
9905615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
991ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
99258667388SSatish Balay {
99358667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
99458667388SSatish Balay   int         ierr;
99558667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
99658667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
99758667388SSatish Balay   return 0;
99858667388SSatish Balay }
9990ac07820SSatish Balay 
10005615d1e5SSatish Balay #undef __FUNC__
10015615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1002ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
10030ac07820SSatish Balay {
10044e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
10054e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
10067d57db60SLois Curfman McInnes   int         ierr;
10077d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
10080ac07820SSatish Balay 
10094e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
10104e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
10114e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
10124e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
10134e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
10144e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
10154e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
10164e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
10174e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
10180ac07820SSatish Balay   if (flag == MAT_LOCAL) {
10194e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
10204e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
10214e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
10224e220ebcSLois Curfman McInnes     info->memory       = isend[3];
10234e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
10240ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1025dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);
10264e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10274e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10284e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10294e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10304e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
10310ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1032dd2c0978SLois Curfman McInnes     MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);
10334e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
10344e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
10354e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
10364e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
10374e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
10380ac07820SSatish Balay   }
10394e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
10404e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
10414e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
10420ac07820SSatish Balay   return 0;
10430ac07820SSatish Balay }
10440ac07820SSatish Balay 
10455615d1e5SSatish Balay #undef __FUNC__
10465615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1047ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
104858667388SSatish Balay {
104958667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
105058667388SSatish Balay 
105158667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
105258667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
10536da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1054c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
1055c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1056b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1057b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1058b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1059aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
106058667388SSatish Balay         MatSetOption(a->A,op);
106158667388SSatish Balay         MatSetOption(a->B,op);
1062b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
10636da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
106458667388SSatish Balay              op == MAT_SYMMETRIC ||
106558667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
106658667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
106758667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
106858667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
106958667388SSatish Balay     a->roworiented = 0;
107058667388SSatish Balay     MatSetOption(a->A,op);
107158667388SSatish Balay     MatSetOption(a->B,op);
107290f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
107390f02eecSBarry Smith     a->donotstash = 1;
107490f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
1075e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
107658667388SSatish Balay   else
1077e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
107858667388SSatish Balay   return 0;
107958667388SSatish Balay }
108058667388SSatish Balay 
10815615d1e5SSatish Balay #undef __FUNC__
10825615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1083ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
10840ac07820SSatish Balay {
10850ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
10860ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
10870ac07820SSatish Balay   Mat        B;
10880ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
10890ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
10900ac07820SSatish Balay   Scalar     *a;
10910ac07820SSatish Balay 
10920ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
1093e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
10940ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
10950ac07820SSatish Balay   CHKERRQ(ierr);
10960ac07820SSatish Balay 
10970ac07820SSatish Balay   /* copy over the A part */
10980ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
10990ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11000ac07820SSatish Balay   row = baij->rstart;
11010ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
11020ac07820SSatish Balay 
11030ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
11040ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
11050ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
11060ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
11070ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
11080ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
11090ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
11100ac07820SSatish Balay         col++; a += bs;
11110ac07820SSatish Balay       }
11120ac07820SSatish Balay     }
11130ac07820SSatish Balay   }
11140ac07820SSatish Balay   /* copy over the B part */
11150ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
11160ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
11170ac07820SSatish Balay   row = baij->rstart*bs;
11180ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
11190ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
11200ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
11210ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
11220ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
11230ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
11240ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
11250ac07820SSatish Balay         col++; a += bs;
11260ac07820SSatish Balay       }
11270ac07820SSatish Balay     }
11280ac07820SSatish Balay   }
11290ac07820SSatish Balay   PetscFree(rvals);
11300ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11310ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11320ac07820SSatish Balay 
11330ac07820SSatish Balay   if (matout != PETSC_NULL) {
11340ac07820SSatish Balay     *matout = B;
11350ac07820SSatish Balay   } else {
11360ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
11370ac07820SSatish Balay     PetscFree(baij->rowners);
11380ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
11390ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
11400ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
11410ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
11420ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
11430ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
11440ac07820SSatish Balay     PetscFree(baij);
11450ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
11460ac07820SSatish Balay     PetscHeaderDestroy(B);
11470ac07820SSatish Balay   }
11480ac07820SSatish Balay   return 0;
11490ac07820SSatish Balay }
11500e95ebc0SSatish Balay 
11515615d1e5SSatish Balay #undef __FUNC__
11525615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
11530e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
11540e95ebc0SSatish Balay {
11550e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
11560e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
11570e95ebc0SSatish Balay   int ierr,s1,s2,s3;
11580e95ebc0SSatish Balay 
11590e95ebc0SSatish Balay   if (ll)  {
11600e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
11610e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1162e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
11630e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
11640e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
11650e95ebc0SSatish Balay   }
1166e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
11670e95ebc0SSatish Balay   return 0;
11680e95ebc0SSatish Balay }
11690e95ebc0SSatish Balay 
11700ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
11710ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
11720ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
11730ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
11740ac07820SSatish Balay    routine.
11750ac07820SSatish Balay */
11765615d1e5SSatish Balay #undef __FUNC__
11775615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1178ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
11790ac07820SSatish Balay {
11800ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
11810ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
11820ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
11830ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
11840ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
11850ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
11860ac07820SSatish Balay   MPI_Comm       comm = A->comm;
11870ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
11880ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
11890ac07820SSatish Balay   IS             istmp;
11900ac07820SSatish Balay 
11910ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
11920ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
11930ac07820SSatish Balay 
11940ac07820SSatish Balay   /*  first count number of contributors to each processor */
11950ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
11960ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
11970ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
11980ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
11990ac07820SSatish Balay     idx = rows[i];
12000ac07820SSatish Balay     found = 0;
12010ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
12020ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
12030ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
12040ac07820SSatish Balay       }
12050ac07820SSatish Balay     }
1206e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
12070ac07820SSatish Balay   }
12080ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
12090ac07820SSatish Balay 
12100ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
12110ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
12120ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
12130ac07820SSatish Balay   nrecvs = work[rank];
12140ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
12150ac07820SSatish Balay   nmax = work[rank];
12160ac07820SSatish Balay   PetscFree(work);
12170ac07820SSatish Balay 
12180ac07820SSatish Balay   /* post receives:   */
12190ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
12200ac07820SSatish Balay   CHKPTRQ(rvalues);
12210ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
12220ac07820SSatish Balay   CHKPTRQ(recv_waits);
12230ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
12240ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
12250ac07820SSatish Balay   }
12260ac07820SSatish Balay 
12270ac07820SSatish Balay   /* do sends:
12280ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
12290ac07820SSatish Balay          the ith processor
12300ac07820SSatish Balay   */
12310ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
12320ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
12330ac07820SSatish Balay   CHKPTRQ(send_waits);
12340ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
12350ac07820SSatish Balay   starts[0] = 0;
12360ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
12370ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
12380ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
12390ac07820SSatish Balay   }
12400ac07820SSatish Balay   ISRestoreIndices(is,&rows);
12410ac07820SSatish Balay 
12420ac07820SSatish Balay   starts[0] = 0;
12430ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
12440ac07820SSatish Balay   count = 0;
12450ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
12460ac07820SSatish Balay     if (procs[i]) {
12470ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
12480ac07820SSatish Balay     }
12490ac07820SSatish Balay   }
12500ac07820SSatish Balay   PetscFree(starts);
12510ac07820SSatish Balay 
12520ac07820SSatish Balay   base = owners[rank]*bs;
12530ac07820SSatish Balay 
12540ac07820SSatish Balay   /*  wait on receives */
12550ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
12560ac07820SSatish Balay   source = lens + nrecvs;
12570ac07820SSatish Balay   count  = nrecvs; slen = 0;
12580ac07820SSatish Balay   while (count) {
12590ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
12600ac07820SSatish Balay     /* unpack receives into our local space */
12610ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
12620ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
12630ac07820SSatish Balay     lens[imdex]  = n;
12640ac07820SSatish Balay     slen += n;
12650ac07820SSatish Balay     count--;
12660ac07820SSatish Balay   }
12670ac07820SSatish Balay   PetscFree(recv_waits);
12680ac07820SSatish Balay 
12690ac07820SSatish Balay   /* move the data into the send scatter */
12700ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
12710ac07820SSatish Balay   count = 0;
12720ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
12730ac07820SSatish Balay     values = rvalues + i*nmax;
12740ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
12750ac07820SSatish Balay       lrows[count++] = values[j] - base;
12760ac07820SSatish Balay     }
12770ac07820SSatish Balay   }
12780ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
12790ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
12800ac07820SSatish Balay 
12810ac07820SSatish Balay   /* actually zap the local rows */
1282537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
12830ac07820SSatish Balay   PLogObjectParent(A,istmp);
12840ac07820SSatish Balay   PetscFree(lrows);
12850ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
12860ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
12870ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
12880ac07820SSatish Balay 
12890ac07820SSatish Balay   /* wait on sends */
12900ac07820SSatish Balay   if (nsends) {
12910ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
12920ac07820SSatish Balay     CHKPTRQ(send_status);
12930ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
12940ac07820SSatish Balay     PetscFree(send_status);
12950ac07820SSatish Balay   }
12960ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
12970ac07820SSatish Balay 
12980ac07820SSatish Balay   return 0;
12990ac07820SSatish Balay }
1300ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
13015615d1e5SSatish Balay #undef __FUNC__
13025615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1303ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1304ba4ca20aSSatish Balay {
1305ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1306ba4ca20aSSatish Balay 
1307ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1308ba4ca20aSSatish Balay   else return 0;
1309ba4ca20aSSatish Balay }
13100ac07820SSatish Balay 
13115615d1e5SSatish Balay #undef __FUNC__
13125615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1313ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1314bb5a7306SBarry Smith {
1315bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1316bb5a7306SBarry Smith   int         ierr;
1317bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1318bb5a7306SBarry Smith   return 0;
1319bb5a7306SBarry Smith }
1320bb5a7306SBarry Smith 
13210ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
13220ac07820SSatish Balay 
132379bdfe76SSatish Balay /* -------------------------------------------------------------------*/
132479bdfe76SSatish Balay static struct _MatOps MatOps = {
1325bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
13264c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
13274c50302cSBarry Smith   0,0,0,0,
13280ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
13290e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
133058667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
13314c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
13324c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
13334c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
133494a9d846SBarry Smith   0,0,MatConvertSameType_MPIBAIJ,0,0,
1335d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1336ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1337bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1338ab26458aSBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
133979bdfe76SSatish Balay 
134079bdfe76SSatish Balay 
13415615d1e5SSatish Balay #undef __FUNC__
13425615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
134379bdfe76SSatish Balay /*@C
134479bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
134579bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
134679bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
134779bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
134879bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
134979bdfe76SSatish Balay 
135079bdfe76SSatish Balay    Input Parameters:
135179bdfe76SSatish Balay .  comm - MPI communicator
135279bdfe76SSatish Balay .  bs   - size of blockk
135379bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
135492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
135592e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
135692e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
135792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
135892e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
135979bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
136092e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
136179bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
136279bdfe76SSatish Balay            submatrix  (same for all local rows)
136392e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
136492e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
136592e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
136692e8d321SLois Curfman McInnes            it is zero.
136792e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
136879bdfe76SSatish Balay            submatrix (same for all local rows).
136992e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
137092e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
137192e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
137279bdfe76SSatish Balay 
137379bdfe76SSatish Balay    Output Parameter:
137479bdfe76SSatish Balay .  A - the matrix
137579bdfe76SSatish Balay 
137679bdfe76SSatish Balay    Notes:
137779bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
137879bdfe76SSatish Balay    (possibly both).
137979bdfe76SSatish Balay 
138079bdfe76SSatish Balay    Storage Information:
138179bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
138279bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
138379bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
138479bdfe76SSatish Balay    local matrix (a rectangular submatrix).
138579bdfe76SSatish Balay 
138679bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
138779bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
138879bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
138979bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
139079bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
139179bdfe76SSatish Balay 
139279bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
139379bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
139479bdfe76SSatish Balay 
139579bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
139679bdfe76SSatish Balay $         -------------------
139779bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
139879bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
139979bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
140079bdfe76SSatish Balay $         -------------------
140179bdfe76SSatish Balay $
140279bdfe76SSatish Balay 
140379bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
140479bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
140579bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
140657b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
140779bdfe76SSatish Balay 
140879bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
140979bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
141079bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
141192e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
141292e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
14136da5968aSLois Curfman McInnes    matrices.
141479bdfe76SSatish Balay 
141592e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
141679bdfe76SSatish Balay 
141779bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
141879bdfe76SSatish Balay @*/
141979bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
142079bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
142179bdfe76SSatish Balay {
142279bdfe76SSatish Balay   Mat          B;
142379bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
14244c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
142579bdfe76SSatish Balay 
1426e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
142779bdfe76SSatish Balay   *A = 0;
142879bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
142979bdfe76SSatish Balay   PLogObjectCreate(B);
143079bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
143179bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
143279bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
14334c50302cSBarry Smith   MPI_Comm_size(comm,&size);
14344c50302cSBarry Smith   if (size == 1) {
14354c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
14364c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
14374c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
14384c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
14394c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
14404c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
14414c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
14424c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
14434c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
14444c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
14454c50302cSBarry Smith   }
14464c50302cSBarry Smith 
144779bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
144879bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
144990f02eecSBarry Smith   B->mapping    = 0;
145079bdfe76SSatish Balay   B->factor     = 0;
145179bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
145279bdfe76SSatish Balay 
1453e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
145479bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
145579bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
145679bdfe76SSatish Balay 
145779bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1458e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1459e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1460e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1461cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1462cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
146379bdfe76SSatish Balay 
146479bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
146579bdfe76SSatish Balay     work[0] = m; work[1] = n;
146679bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
146779bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
146879bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
146979bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
147079bdfe76SSatish Balay   }
147179bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
147279bdfe76SSatish Balay     Mbs = M/bs;
1473e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
147479bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
147579bdfe76SSatish Balay     m   = mbs*bs;
147679bdfe76SSatish Balay   }
147779bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
147879bdfe76SSatish Balay     Nbs = N/bs;
1479e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
148079bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
148179bdfe76SSatish Balay     n   = nbs*bs;
148279bdfe76SSatish Balay   }
1483e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
148479bdfe76SSatish Balay 
148579bdfe76SSatish Balay   b->m = m; B->m = m;
148679bdfe76SSatish Balay   b->n = n; B->n = n;
148779bdfe76SSatish Balay   b->N = N; B->N = N;
148879bdfe76SSatish Balay   b->M = M; B->M = M;
148979bdfe76SSatish Balay   b->bs  = bs;
149079bdfe76SSatish Balay   b->bs2 = bs*bs;
149179bdfe76SSatish Balay   b->mbs = mbs;
149279bdfe76SSatish Balay   b->nbs = nbs;
149379bdfe76SSatish Balay   b->Mbs = Mbs;
149479bdfe76SSatish Balay   b->Nbs = Nbs;
149579bdfe76SSatish Balay 
149679bdfe76SSatish Balay   /* build local table of row and column ownerships */
149779bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
149879bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
14990ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
150079bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
150179bdfe76SSatish Balay   b->rowners[0] = 0;
150279bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
150379bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
150479bdfe76SSatish Balay   }
150579bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
150679bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
15074fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
15084fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
15094fa0d573SSatish Balay 
151079bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
151179bdfe76SSatish Balay   b->cowners[0] = 0;
151279bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
151379bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
151479bdfe76SSatish Balay   }
151579bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
151679bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
15174fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
15184fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
151979bdfe76SSatish Balay 
152079bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
152179bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
152279bdfe76SSatish Balay   PLogObjectParent(B,b->A);
152379bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
152479bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
152579bdfe76SSatish Balay   PLogObjectParent(B,b->B);
152679bdfe76SSatish Balay 
152779bdfe76SSatish Balay   /* build cache for off array entries formed */
152879bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
152990f02eecSBarry Smith   b->donotstash  = 0;
153079bdfe76SSatish Balay   b->colmap      = 0;
153179bdfe76SSatish Balay   b->garray      = 0;
153279bdfe76SSatish Balay   b->roworiented = 1;
153379bdfe76SSatish Balay 
1534*30793edcSSatish Balay   /* stuff used in block assembly */
1535*30793edcSSatish Balay   b->barray      = 0;
1536*30793edcSSatish Balay 
153779bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
153879bdfe76SSatish Balay   b->lvec        = 0;
153979bdfe76SSatish Balay   b->Mvctx       = 0;
154079bdfe76SSatish Balay 
154179bdfe76SSatish Balay   /* stuff for MatGetRow() */
154279bdfe76SSatish Balay   b->rowindices   = 0;
154379bdfe76SSatish Balay   b->rowvalues    = 0;
154479bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
154579bdfe76SSatish Balay 
154679bdfe76SSatish Balay   *A = B;
154779bdfe76SSatish Balay   return 0;
154879bdfe76SSatish Balay }
1549026e39d0SSatish Balay 
15505615d1e5SSatish Balay #undef __FUNC__
15515615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
15520ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
15530ac07820SSatish Balay {
15540ac07820SSatish Balay   Mat         mat;
15550ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
15560ac07820SSatish Balay   int         ierr, len=0, flg;
15570ac07820SSatish Balay 
15580ac07820SSatish Balay   *newmat       = 0;
15590ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
15600ac07820SSatish Balay   PLogObjectCreate(mat);
15610ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
15620ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
15630ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
15640ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
15650ac07820SSatish Balay   mat->factor     = matin->factor;
15660ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
15670ac07820SSatish Balay 
15680ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
15690ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
15700ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
15710ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
15720ac07820SSatish Balay 
15730ac07820SSatish Balay   a->bs  = oldmat->bs;
15740ac07820SSatish Balay   a->bs2 = oldmat->bs2;
15750ac07820SSatish Balay   a->mbs = oldmat->mbs;
15760ac07820SSatish Balay   a->nbs = oldmat->nbs;
15770ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
15780ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
15790ac07820SSatish Balay 
15800ac07820SSatish Balay   a->rstart       = oldmat->rstart;
15810ac07820SSatish Balay   a->rend         = oldmat->rend;
15820ac07820SSatish Balay   a->cstart       = oldmat->cstart;
15830ac07820SSatish Balay   a->cend         = oldmat->cend;
15840ac07820SSatish Balay   a->size         = oldmat->size;
15850ac07820SSatish Balay   a->rank         = oldmat->rank;
1586e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
15870ac07820SSatish Balay   a->rowvalues    = 0;
15880ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
1589*30793edcSSatish Balay   a->barray       = 0;
15900ac07820SSatish Balay 
15910ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
15920ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
15930ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
15940ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
15950ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
15960ac07820SSatish Balay   if (oldmat->colmap) {
15970ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
15980ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
15990ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
16000ac07820SSatish Balay   } else a->colmap = 0;
16010ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
16020ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
16030ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
16040ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
16050ac07820SSatish Balay   } else a->garray = 0;
16060ac07820SSatish Balay 
16070ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
16080ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
16090ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
16100ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
16110ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
16120ac07820SSatish Balay   PLogObjectParent(mat,a->A);
16130ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
16140ac07820SSatish Balay   PLogObjectParent(mat,a->B);
16150ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
16160ac07820SSatish Balay   if (flg) {
16170ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
16180ac07820SSatish Balay   }
16190ac07820SSatish Balay   *newmat = mat;
16200ac07820SSatish Balay   return 0;
16210ac07820SSatish Balay }
162257b952d6SSatish Balay 
162357b952d6SSatish Balay #include "sys.h"
162457b952d6SSatish Balay 
16255615d1e5SSatish Balay #undef __FUNC__
16265615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
162757b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
162857b952d6SSatish Balay {
162957b952d6SSatish Balay   Mat          A;
163057b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
163157b952d6SSatish Balay   Scalar       *vals,*buf;
163257b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
163357b952d6SSatish Balay   MPI_Status   status;
1634cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
163557b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
163657b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
163757b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
163857b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
163957b952d6SSatish Balay 
164057b952d6SSatish Balay 
164157b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
164257b952d6SSatish Balay   bs2  = bs*bs;
164357b952d6SSatish Balay 
164457b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
164557b952d6SSatish Balay   if (!rank) {
164657b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
164757b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1648e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
164957b952d6SSatish Balay   }
165057b952d6SSatish Balay 
165157b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
165257b952d6SSatish Balay   M = header[1]; N = header[2];
165357b952d6SSatish Balay 
1654e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
165557b952d6SSatish Balay 
165657b952d6SSatish Balay   /*
165757b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
165857b952d6SSatish Balay      divisible by the blocksize
165957b952d6SSatish Balay   */
166057b952d6SSatish Balay   Mbs        = M/bs;
166157b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
166257b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
166357b952d6SSatish Balay   else                  Mbs++;
166457b952d6SSatish Balay   if (extra_rows &&!rank) {
1665b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
166657b952d6SSatish Balay   }
1667537820f0SBarry Smith 
166857b952d6SSatish Balay   /* determine ownership of all rows */
166957b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
167057b952d6SSatish Balay   m   = mbs * bs;
1671cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1672cee3aa6bSSatish Balay   browners = rowners + size + 1;
167357b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
167457b952d6SSatish Balay   rowners[0] = 0;
1675cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1676cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
167757b952d6SSatish Balay   rstart = rowners[rank];
167857b952d6SSatish Balay   rend   = rowners[rank+1];
167957b952d6SSatish Balay 
168057b952d6SSatish Balay   /* distribute row lengths to all processors */
168157b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
168257b952d6SSatish Balay   if (!rank) {
168357b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
168457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
168557b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
168657b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1687cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1688cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
168957b952d6SSatish Balay     PetscFree(sndcounts);
169057b952d6SSatish Balay   }
169157b952d6SSatish Balay   else {
169257b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
169357b952d6SSatish Balay   }
169457b952d6SSatish Balay 
169557b952d6SSatish Balay   if (!rank) {
169657b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
169757b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
169857b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
169957b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
170057b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
170157b952d6SSatish Balay         procsnz[i] += rowlengths[j];
170257b952d6SSatish Balay       }
170357b952d6SSatish Balay     }
170457b952d6SSatish Balay     PetscFree(rowlengths);
170557b952d6SSatish Balay 
170657b952d6SSatish Balay     /* determine max buffer needed and allocate it */
170757b952d6SSatish Balay     maxnz = 0;
170857b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
170957b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
171057b952d6SSatish Balay     }
171157b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
171257b952d6SSatish Balay 
171357b952d6SSatish Balay     /* read in my part of the matrix column indices  */
171457b952d6SSatish Balay     nz = procsnz[0];
171557b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
171657b952d6SSatish Balay     mycols = ibuf;
1717cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
171857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1719cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1720cee3aa6bSSatish Balay 
172157b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
172257b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
172357b952d6SSatish Balay       nz = procsnz[i];
172457b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
172557b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
172657b952d6SSatish Balay     }
172757b952d6SSatish Balay     /* read in the stuff for the last proc */
172857b952d6SSatish Balay     if ( size != 1 ) {
172957b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
173057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
173157b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1732cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
173357b952d6SSatish Balay     }
173457b952d6SSatish Balay     PetscFree(cols);
173557b952d6SSatish Balay   }
173657b952d6SSatish Balay   else {
173757b952d6SSatish Balay     /* determine buffer space needed for message */
173857b952d6SSatish Balay     nz = 0;
173957b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
174057b952d6SSatish Balay       nz += locrowlens[i];
174157b952d6SSatish Balay     }
174257b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
174357b952d6SSatish Balay     mycols = ibuf;
174457b952d6SSatish Balay     /* receive message of column indices*/
174557b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
174657b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1747e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
174857b952d6SSatish Balay   }
174957b952d6SSatish Balay 
175057b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1751cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1752cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
175357b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1754cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
175557b952d6SSatish Balay   masked1 = mask    + Mbs;
175657b952d6SSatish Balay   masked2 = masked1 + Mbs;
175757b952d6SSatish Balay   rowcount = 0; nzcount = 0;
175857b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
175957b952d6SSatish Balay     dcount  = 0;
176057b952d6SSatish Balay     odcount = 0;
176157b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
176257b952d6SSatish Balay       kmax = locrowlens[rowcount];
176357b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
176457b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
176557b952d6SSatish Balay         if (!mask[tmp]) {
176657b952d6SSatish Balay           mask[tmp] = 1;
176757b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
176857b952d6SSatish Balay           else masked1[dcount++] = tmp;
176957b952d6SSatish Balay         }
177057b952d6SSatish Balay       }
177157b952d6SSatish Balay       rowcount++;
177257b952d6SSatish Balay     }
1773cee3aa6bSSatish Balay 
177457b952d6SSatish Balay     dlens[i]  = dcount;
177557b952d6SSatish Balay     odlens[i] = odcount;
1776cee3aa6bSSatish Balay 
177757b952d6SSatish Balay     /* zero out the mask elements we set */
177857b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
177957b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
178057b952d6SSatish Balay   }
1781cee3aa6bSSatish Balay 
178257b952d6SSatish Balay   /* create our matrix */
1783537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1784537820f0SBarry Smith          CHKERRQ(ierr);
178557b952d6SSatish Balay   A = *newmat;
17866d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
178757b952d6SSatish Balay 
178857b952d6SSatish Balay   if (!rank) {
178957b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
179057b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
179157b952d6SSatish Balay     nz = procsnz[0];
179257b952d6SSatish Balay     vals = buf;
1793cee3aa6bSSatish Balay     mycols = ibuf;
1794cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
179557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1796cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1797537820f0SBarry Smith 
179857b952d6SSatish Balay     /* insert into matrix */
179957b952d6SSatish Balay     jj      = rstart*bs;
180057b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
180157b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
180257b952d6SSatish Balay       mycols += locrowlens[i];
180357b952d6SSatish Balay       vals   += locrowlens[i];
180457b952d6SSatish Balay       jj++;
180557b952d6SSatish Balay     }
180657b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
180757b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
180857b952d6SSatish Balay       nz = procsnz[i];
180957b952d6SSatish Balay       vals = buf;
181057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
181157b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
181257b952d6SSatish Balay     }
181357b952d6SSatish Balay     /* the last proc */
181457b952d6SSatish Balay     if ( size != 1 ){
181557b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1816cee3aa6bSSatish Balay       vals = buf;
181757b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
181857b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1819cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
182057b952d6SSatish Balay     }
182157b952d6SSatish Balay     PetscFree(procsnz);
182257b952d6SSatish Balay   }
182357b952d6SSatish Balay   else {
182457b952d6SSatish Balay     /* receive numeric values */
182557b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
182657b952d6SSatish Balay 
182757b952d6SSatish Balay     /* receive message of values*/
182857b952d6SSatish Balay     vals = buf;
1829cee3aa6bSSatish Balay     mycols = ibuf;
183057b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
183157b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1832e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
183357b952d6SSatish Balay 
183457b952d6SSatish Balay     /* insert into matrix */
183557b952d6SSatish Balay     jj      = rstart*bs;
1836cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
183757b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
183857b952d6SSatish Balay       mycols += locrowlens[i];
183957b952d6SSatish Balay       vals   += locrowlens[i];
184057b952d6SSatish Balay       jj++;
184157b952d6SSatish Balay     }
184257b952d6SSatish Balay   }
184357b952d6SSatish Balay   PetscFree(locrowlens);
184457b952d6SSatish Balay   PetscFree(buf);
184557b952d6SSatish Balay   PetscFree(ibuf);
184657b952d6SSatish Balay   PetscFree(rowners);
184757b952d6SSatish Balay   PetscFree(dlens);
1848cee3aa6bSSatish Balay   PetscFree(mask);
18496d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
18506d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
185157b952d6SSatish Balay   return 0;
185257b952d6SSatish Balay }
185357b952d6SSatish Balay 
185457b952d6SSatish Balay 
1855