xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 80c1aa95bd12b9d562ae9106f3faf74872834bb5)
179bdfe76SSatish Balay #ifndef lint
2*80c1aa95SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.46 1997/01/06 20:25:27 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
243b2fbd54SBarry Smith 
25537820f0SBarry Smith /*
26537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2957b952d6SSatish Balay    length of colmap equals the global matrix length.
3057b952d6SSatish Balay */
315615d1e5SSatish Balay #undef __FUNC__
325615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3457b952d6SSatish Balay {
3557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
37928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3857b952d6SSatish Balay 
3957b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
4057b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4157b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
42928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4357b952d6SSatish Balay   return 0;
4457b952d6SSatish Balay }
4557b952d6SSatish Balay 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
483b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
5057b952d6SSatish Balay {
513b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5257b952d6SSatish Balay   int         ierr;
533b2fbd54SBarry Smith   if (aij->size == 1) {
543b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
55e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
563b2fbd54SBarry Smith   return 0;
573b2fbd54SBarry Smith }
583b2fbd54SBarry Smith 
595615d1e5SSatish Balay #undef __FUNC__
605615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
613b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
623b2fbd54SBarry Smith                                 PetscTruth *done)
633b2fbd54SBarry Smith {
643b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
653b2fbd54SBarry Smith   int        ierr;
663b2fbd54SBarry Smith   if (aij->size == 1) {
673b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
68e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6957b952d6SSatish Balay   return 0;
7057b952d6SSatish Balay }
71*80c1aa95SSatish Balay #define CHUNKSIZE  10
72*80c1aa95SSatish Balay 
73*80c1aa95SSatish Balay #define  MatSetValues_SeqBAIJ_A_Private( new_A, row, col, value) \
74*80c1aa95SSatish Balay { \
75*80c1aa95SSatish Balay  \
76*80c1aa95SSatish Balay     brow = row/bs;  \
77*80c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
78*80c1aa95SSatish Balay     rmax = imax[brow]; nrow = ailen[brow]; \
79*80c1aa95SSatish Balay       bcol = col/bs; \
80*80c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
81*80c1aa95SSatish Balay       for ( _i=0; _i<nrow; _i++ ) { \
82*80c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
83*80c1aa95SSatish Balay         if (rp[_i] == bcol) { \
84*80c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
85*80c1aa95SSatish Balay 	    *bap  = value; \
86*80c1aa95SSatish Balay           goto _noinsert; \
87*80c1aa95SSatish Balay         } \
88*80c1aa95SSatish Balay       } \
89*80c1aa95SSatish Balay       if (nrow >= rmax) { \
90*80c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
91*80c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
92*80c1aa95SSatish Balay         Scalar *new_a; \
93*80c1aa95SSatish Balay  \
94*80c1aa95SSatish Balay         /* malloc new storage space */ \
95*80c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
96*80c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
97*80c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
98*80c1aa95SSatish Balay         new_i   = new_j + new_nz; \
99*80c1aa95SSatish Balay  \
100*80c1aa95SSatish Balay         /* copy over old data into new slots */ \
101*80c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
102*80c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
103*80c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
104*80c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
105*80c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
106*80c1aa95SSatish Balay                                                            len*sizeof(int)); \
107*80c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
108*80c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
109*80c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
110*80c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
111*80c1aa95SSatish Balay         /* free up old matrix storage */ \
112*80c1aa95SSatish Balay         PetscFree(a->a);  \
113*80c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
114*80c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
115*80c1aa95SSatish Balay         a->singlemalloc = 1; \
116*80c1aa95SSatish Balay  \
117*80c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
118*80c1aa95SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE; \
119*80c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
120*80c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
121*80c1aa95SSatish Balay         a->reallocs++; \
122*80c1aa95SSatish Balay         a->nz++; \
123*80c1aa95SSatish Balay       } \
124*80c1aa95SSatish Balay       N = nrow++ - 1;  \
125*80c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
126*80c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
127*80c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
128*80c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
129*80c1aa95SSatish Balay       } \
130*80c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
131*80c1aa95SSatish Balay       rp[_i]                      = bcol;  \
132*80c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
133*80c1aa95SSatish Balay       _noinsert:; \
134*80c1aa95SSatish Balay     ailen[brow] = nrow; \
135*80c1aa95SSatish Balay }
13657b952d6SSatish Balay 
137639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
1385615d1e5SSatish Balay #undef __FUNC__
1395615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
14057b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
14157b952d6SSatish Balay {
14257b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
14357b952d6SSatish Balay   Scalar      value;
1444fa0d573SSatish Balay   int         ierr,i,j,row,col;
1454fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1464fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1474fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
14857b952d6SSatish Balay 
149*80c1aa95SSatish Balay   Mat A = baij->A;
150*80c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
151*80c1aa95SSatish Balay   int         *rp,ii,nrow,_i,rmax,N;
152*80c1aa95SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
153*80c1aa95SSatish Balay   int         *aj=a->j,brow,bcol;
154*80c1aa95SSatish Balay   int          ridx,cidx,bs2=a->bs2;
155*80c1aa95SSatish Balay   Scalar      *ap,*aa=a->a,*bap;
156*80c1aa95SSatish Balay 
1572c3acbe9SBarry Smith #if defined(PETSC_BOPT_g)
15857b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
159e3372554SBarry Smith     SETERRQ(1,0,"Cannot mix inserts and adds");
16057b952d6SSatish Balay   }
1612c3acbe9SBarry Smith #endif
16257b952d6SSatish Balay   baij->insertmode = addv;
16357b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
164639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
165e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
166e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
167639f9d9dSBarry Smith #endif
16857b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
16957b952d6SSatish Balay       row = im[i] - rstart_orig;
17057b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
17157b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
17257b952d6SSatish Balay           col = in[j] - cstart_orig;
17357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
174*80c1aa95SSatish Balay           MatSetValues_SeqBAIJ_A_Private(baij->A,row,col,value);
175*80c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
17657b952d6SSatish Balay         }
177639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
178e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
179e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
180639f9d9dSBarry Smith #endif
18157b952d6SSatish Balay         else {
18257b952d6SSatish Balay           if (mat->was_assembled) {
183905e6a2fSBarry Smith             if (!baij->colmap) {
184905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
185905e6a2fSBarry Smith             }
186905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
18757b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
18857b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
18957b952d6SSatish Balay               col =  in[j];
19057b952d6SSatish Balay             }
19157b952d6SSatish Balay           }
19257b952d6SSatish Balay           else col = in[j];
19357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
194639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
19557b952d6SSatish Balay         }
19657b952d6SSatish Balay       }
19757b952d6SSatish Balay     }
19857b952d6SSatish Balay     else {
19990f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
20057b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
20157b952d6SSatish Balay       }
20257b952d6SSatish Balay       else {
20390f02eecSBarry Smith         if (!baij->donotstash) {
20457b952d6SSatish Balay           row = im[i];
20557b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
20657b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
20757b952d6SSatish Balay           }
20857b952d6SSatish Balay         }
20957b952d6SSatish Balay       }
21057b952d6SSatish Balay     }
21190f02eecSBarry Smith   }
21257b952d6SSatish Balay   return 0;
21357b952d6SSatish Balay }
21457b952d6SSatish Balay 
2155615d1e5SSatish Balay #undef __FUNC__
2165615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
217d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
218d6de1c52SSatish Balay {
219d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
220d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
221d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
222d6de1c52SSatish Balay 
223d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
224e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
225e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
226d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
227d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
228d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
229e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
230e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
231d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
232d6de1c52SSatish Balay           col = idxn[j] - bscstart;
233d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
234d6de1c52SSatish Balay         }
235d6de1c52SSatish Balay         else {
236905e6a2fSBarry Smith           if (!baij->colmap) {
237905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238905e6a2fSBarry Smith           }
239e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
240dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
241d9d09a02SSatish Balay           else {
242dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
243d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
244d6de1c52SSatish Balay           }
245d6de1c52SSatish Balay         }
246d6de1c52SSatish Balay       }
247d9d09a02SSatish Balay     }
248d6de1c52SSatish Balay     else {
249e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
250d6de1c52SSatish Balay     }
251d6de1c52SSatish Balay   }
252d6de1c52SSatish Balay   return 0;
253d6de1c52SSatish Balay }
254d6de1c52SSatish Balay 
2555615d1e5SSatish Balay #undef __FUNC__
2565615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
257d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
258d6de1c52SSatish Balay {
259d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
260d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
261acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
262d6de1c52SSatish Balay   double     sum = 0.0;
263d6de1c52SSatish Balay   Scalar     *v;
264d6de1c52SSatish Balay 
265d6de1c52SSatish Balay   if (baij->size == 1) {
266d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
267d6de1c52SSatish Balay   } else {
268d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
269d6de1c52SSatish Balay       v = amat->a;
270d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
271d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
272d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
273d6de1c52SSatish Balay #else
274d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
275d6de1c52SSatish Balay #endif
276d6de1c52SSatish Balay       }
277d6de1c52SSatish Balay       v = bmat->a;
278d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
279d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
280d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
281d6de1c52SSatish Balay #else
282d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
283d6de1c52SSatish Balay #endif
284d6de1c52SSatish Balay       }
285d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
286d6de1c52SSatish Balay       *norm = sqrt(*norm);
287d6de1c52SSatish Balay     }
288acdf5bf4SSatish Balay     else
289e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
290d6de1c52SSatish Balay   }
291d6de1c52SSatish Balay   return 0;
292d6de1c52SSatish Balay }
29357b952d6SSatish Balay 
2945615d1e5SSatish Balay #undef __FUNC__
2955615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
29657b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
29757b952d6SSatish Balay {
29857b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
29957b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
30057b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
30157b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
30257b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
30357b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
30457b952d6SSatish Balay   InsertMode  addv;
30557b952d6SSatish Balay   Scalar      *rvalues,*svalues;
30657b952d6SSatish Balay 
30757b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
30857b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
30957b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
310e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
31157b952d6SSatish Balay   }
31257b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
31357b952d6SSatish Balay 
31457b952d6SSatish Balay   /*  first count number of contributors to each processor */
31557b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
31657b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
31757b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
31857b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
31957b952d6SSatish Balay     idx = baij->stash.idx[i];
32057b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
32157b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
32257b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
32357b952d6SSatish Balay       }
32457b952d6SSatish Balay     }
32557b952d6SSatish Balay   }
32657b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
32757b952d6SSatish Balay 
32857b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
32957b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
33057b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
33157b952d6SSatish Balay   nreceives = work[rank];
33257b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
33357b952d6SSatish Balay   nmax = work[rank];
33457b952d6SSatish Balay   PetscFree(work);
33557b952d6SSatish Balay 
33657b952d6SSatish Balay   /* post receives:
33757b952d6SSatish Balay        1) each message will consist of ordered pairs
33857b952d6SSatish Balay      (global index,value) we store the global index as a double
33957b952d6SSatish Balay      to simplify the message passing.
34057b952d6SSatish Balay        2) since we don't know how long each individual message is we
34157b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
34257b952d6SSatish Balay      this is a lot of wasted space.
34357b952d6SSatish Balay 
34457b952d6SSatish Balay 
34557b952d6SSatish Balay        This could be done better.
34657b952d6SSatish Balay   */
34757b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
34857b952d6SSatish Balay   CHKPTRQ(rvalues);
34957b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
35057b952d6SSatish Balay   CHKPTRQ(recv_waits);
35157b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
35257b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
35357b952d6SSatish Balay               comm,recv_waits+i);
35457b952d6SSatish Balay   }
35557b952d6SSatish Balay 
35657b952d6SSatish Balay   /* do sends:
35757b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
35857b952d6SSatish Balay          the ith processor
35957b952d6SSatish Balay   */
36057b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
36157b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
36257b952d6SSatish Balay   CHKPTRQ(send_waits);
36357b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
36457b952d6SSatish Balay   starts[0] = 0;
36557b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
36657b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
36757b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
36857b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
36957b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
37057b952d6SSatish Balay   }
37157b952d6SSatish Balay   PetscFree(owner);
37257b952d6SSatish Balay   starts[0] = 0;
37357b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
37457b952d6SSatish Balay   count = 0;
37557b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
37657b952d6SSatish Balay     if (procs[i]) {
37757b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
37857b952d6SSatish Balay                 comm,send_waits+count++);
37957b952d6SSatish Balay     }
38057b952d6SSatish Balay   }
38157b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
38257b952d6SSatish Balay 
38357b952d6SSatish Balay   /* Free cache space */
384d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
38557b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
38657b952d6SSatish Balay 
38757b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
38857b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
38957b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
39057b952d6SSatish Balay   baij->rmax       = nmax;
39157b952d6SSatish Balay 
39257b952d6SSatish Balay   return 0;
39357b952d6SSatish Balay }
39457b952d6SSatish Balay 
39557b952d6SSatish Balay 
3965615d1e5SSatish Balay #undef __FUNC__
3975615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
39857b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
39957b952d6SSatish Balay {
40057b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
40157b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
40257b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
40357b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
40457b952d6SSatish Balay   Scalar      *values,val;
40557b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
40657b952d6SSatish Balay 
40757b952d6SSatish Balay   /*  wait on receives */
40857b952d6SSatish Balay   while (count) {
40957b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
41057b952d6SSatish Balay     /* unpack receives into our local space */
41157b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
41257b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
41357b952d6SSatish Balay     n = n/3;
41457b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
41557b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
41657b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
41757b952d6SSatish Balay       val = values[3*i+2];
41857b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
41957b952d6SSatish Balay         col -= baij->cstart*bs;
42057b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
42157b952d6SSatish Balay       }
42257b952d6SSatish Balay       else {
42357b952d6SSatish Balay         if (mat->was_assembled) {
424905e6a2fSBarry Smith           if (!baij->colmap) {
425905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
426905e6a2fSBarry Smith           }
427905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
42857b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
42957b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
43057b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
43157b952d6SSatish Balay           }
43257b952d6SSatish Balay         }
43357b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
43457b952d6SSatish Balay       }
43557b952d6SSatish Balay     }
43657b952d6SSatish Balay     count--;
43757b952d6SSatish Balay   }
43857b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
43957b952d6SSatish Balay 
44057b952d6SSatish Balay   /* wait on sends */
44157b952d6SSatish Balay   if (baij->nsends) {
44257b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
44357b952d6SSatish Balay     CHKPTRQ(send_status);
44457b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
44557b952d6SSatish Balay     PetscFree(send_status);
44657b952d6SSatish Balay   }
44757b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
44857b952d6SSatish Balay 
44957b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
45057b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
45157b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
45257b952d6SSatish Balay 
45357b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
45457b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
45557b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
45657b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
45757b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
45857b952d6SSatish Balay   }
45957b952d6SSatish Balay 
4606d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
46157b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
46257b952d6SSatish Balay   }
46357b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
46457b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
46557b952d6SSatish Balay 
46657b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
46757b952d6SSatish Balay   return 0;
46857b952d6SSatish Balay }
46957b952d6SSatish Balay 
4705615d1e5SSatish Balay #undef __FUNC__
4715615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
47257b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
47357b952d6SSatish Balay {
47457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
47557b952d6SSatish Balay   int          ierr;
47657b952d6SSatish Balay 
47757b952d6SSatish Balay   if (baij->size == 1) {
47857b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
47957b952d6SSatish Balay   }
480e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
48157b952d6SSatish Balay   return 0;
48257b952d6SSatish Balay }
48357b952d6SSatish Balay 
4845615d1e5SSatish Balay #undef __FUNC__
4855615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
48657b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
48757b952d6SSatish Balay {
48857b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
489cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
49057b952d6SSatish Balay   FILE         *fd;
49157b952d6SSatish Balay   ViewerType   vtype;
49257b952d6SSatish Balay 
49357b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
49457b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
49557b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
496639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
4974e220ebcSLois Curfman McInnes       MatInfo info;
49857b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
49957b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
5004e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
50157b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
50257b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
5034e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
5044e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
5054e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
5064e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
5074e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
5084e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
50957b952d6SSatish Balay       fflush(fd);
51057b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
51157b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
51257b952d6SSatish Balay       return 0;
51357b952d6SSatish Balay     }
514639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
515bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
51657b952d6SSatish Balay       return 0;
51757b952d6SSatish Balay     }
51857b952d6SSatish Balay   }
51957b952d6SSatish Balay 
52057b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
52157b952d6SSatish Balay     Draw       draw;
52257b952d6SSatish Balay     PetscTruth isnull;
52357b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
52457b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
52557b952d6SSatish Balay   }
52657b952d6SSatish Balay 
52757b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
52857b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
52957b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
53057b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
53157b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
53257b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
53357b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
53457b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
53557b952d6SSatish Balay     fflush(fd);
53657b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
53757b952d6SSatish Balay   }
53857b952d6SSatish Balay   else {
53957b952d6SSatish Balay     int size = baij->size;
54057b952d6SSatish Balay     rank = baij->rank;
54157b952d6SSatish Balay     if (size == 1) {
54257b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
54357b952d6SSatish Balay     }
54457b952d6SSatish Balay     else {
54557b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
54657b952d6SSatish Balay       Mat         A;
54757b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
54857b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
54957b952d6SSatish Balay       int         mbs=baij->mbs;
55057b952d6SSatish Balay       Scalar      *a;
55157b952d6SSatish Balay 
55257b952d6SSatish Balay       if (!rank) {
553cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
55457b952d6SSatish Balay         CHKERRQ(ierr);
55557b952d6SSatish Balay       }
55657b952d6SSatish Balay       else {
557cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
55857b952d6SSatish Balay         CHKERRQ(ierr);
55957b952d6SSatish Balay       }
56057b952d6SSatish Balay       PLogObjectParent(mat,A);
56157b952d6SSatish Balay 
56257b952d6SSatish Balay       /* copy over the A part */
56357b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
56457b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
56557b952d6SSatish Balay       row = baij->rstart;
56657b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
56757b952d6SSatish Balay 
56857b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
56957b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
57057b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
57157b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
57257b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
57357b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
574cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
575cee3aa6bSSatish Balay             col++; a += bs;
57657b952d6SSatish Balay           }
57757b952d6SSatish Balay         }
57857b952d6SSatish Balay       }
57957b952d6SSatish Balay       /* copy over the B part */
58057b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
58157b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
58257b952d6SSatish Balay       row = baij->rstart*bs;
58357b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
58457b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
58557b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
58657b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
58757b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
58857b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
589cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
590cee3aa6bSSatish Balay             col++; a += bs;
59157b952d6SSatish Balay           }
59257b952d6SSatish Balay         }
59357b952d6SSatish Balay       }
59457b952d6SSatish Balay       PetscFree(rvals);
5956d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5966d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
59757b952d6SSatish Balay       if (!rank) {
59857b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
59957b952d6SSatish Balay       }
60057b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
60157b952d6SSatish Balay     }
60257b952d6SSatish Balay   }
60357b952d6SSatish Balay   return 0;
60457b952d6SSatish Balay }
60557b952d6SSatish Balay 
60657b952d6SSatish Balay 
60757b952d6SSatish Balay 
6085615d1e5SSatish Balay #undef __FUNC__
6095615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
61057b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
61157b952d6SSatish Balay {
61257b952d6SSatish Balay   Mat         mat = (Mat) obj;
61357b952d6SSatish Balay   int         ierr;
61457b952d6SSatish Balay   ViewerType  vtype;
61557b952d6SSatish Balay 
61657b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
61757b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
61857b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
61957b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
62057b952d6SSatish Balay   }
62157b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
62257b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
62357b952d6SSatish Balay   }
62457b952d6SSatish Balay   return 0;
62557b952d6SSatish Balay }
62657b952d6SSatish Balay 
6275615d1e5SSatish Balay #undef __FUNC__
6285615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
62979bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
63079bdfe76SSatish Balay {
63179bdfe76SSatish Balay   Mat         mat = (Mat) obj;
63279bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
63379bdfe76SSatish Balay   int         ierr;
63479bdfe76SSatish Balay 
63579bdfe76SSatish Balay #if defined(PETSC_LOG)
63679bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
63779bdfe76SSatish Balay #endif
63879bdfe76SSatish Balay 
63979bdfe76SSatish Balay   PetscFree(baij->rowners);
64079bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
64179bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
64279bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
64379bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
64479bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
64579bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
64679bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
64779bdfe76SSatish Balay   PetscFree(baij);
64890f02eecSBarry Smith   if (mat->mapping) {
64990f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
65090f02eecSBarry Smith   }
65179bdfe76SSatish Balay   PLogObjectDestroy(mat);
65279bdfe76SSatish Balay   PetscHeaderDestroy(mat);
65379bdfe76SSatish Balay   return 0;
65479bdfe76SSatish Balay }
65579bdfe76SSatish Balay 
6565615d1e5SSatish Balay #undef __FUNC__
6575615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
658cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
659cee3aa6bSSatish Balay {
660cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
66147b4a8eaSLois Curfman McInnes   int         ierr, nt;
662cee3aa6bSSatish Balay 
663c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
66447b4a8eaSLois Curfman McInnes   if (nt != a->n) {
665e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and xx");
66647b4a8eaSLois Curfman McInnes   }
667c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
66847b4a8eaSLois Curfman McInnes   if (nt != a->m) {
669e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
67047b4a8eaSLois Curfman McInnes   }
67143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
672cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
67343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
674cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
67543a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
676cee3aa6bSSatish Balay   return 0;
677cee3aa6bSSatish Balay }
678cee3aa6bSSatish Balay 
6795615d1e5SSatish Balay #undef __FUNC__
6805615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
681cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
682cee3aa6bSSatish Balay {
683cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
684cee3aa6bSSatish Balay   int        ierr;
68543a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
686cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
68743a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
688cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
689cee3aa6bSSatish Balay   return 0;
690cee3aa6bSSatish Balay }
691cee3aa6bSSatish Balay 
6925615d1e5SSatish Balay #undef __FUNC__
6935615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
694cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
695cee3aa6bSSatish Balay {
696cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
697cee3aa6bSSatish Balay   int        ierr;
698cee3aa6bSSatish Balay 
699cee3aa6bSSatish Balay   /* do nondiagonal part */
700cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
701cee3aa6bSSatish Balay   /* send it on its way */
702537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
703cee3aa6bSSatish Balay   /* do local part */
704cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
705cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
706cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
707cee3aa6bSSatish Balay   /* but is not perhaps always true. */
708639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
709cee3aa6bSSatish Balay   return 0;
710cee3aa6bSSatish Balay }
711cee3aa6bSSatish Balay 
7125615d1e5SSatish Balay #undef __FUNC__
7135615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
714cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
715cee3aa6bSSatish Balay {
716cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
717cee3aa6bSSatish Balay   int        ierr;
718cee3aa6bSSatish Balay 
719cee3aa6bSSatish Balay   /* do nondiagonal part */
720cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
721cee3aa6bSSatish Balay   /* send it on its way */
722537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
723cee3aa6bSSatish Balay   /* do local part */
724cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
725cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
726cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
727cee3aa6bSSatish Balay   /* but is not perhaps always true. */
728537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
729cee3aa6bSSatish Balay   return 0;
730cee3aa6bSSatish Balay }
731cee3aa6bSSatish Balay 
732cee3aa6bSSatish Balay /*
733cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
734cee3aa6bSSatish Balay    diagonal block
735cee3aa6bSSatish Balay */
7365615d1e5SSatish Balay #undef __FUNC__
7375615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
738cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
739cee3aa6bSSatish Balay {
740cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
741cee3aa6bSSatish Balay   if (a->M != a->N)
742e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
743cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
744cee3aa6bSSatish Balay }
745cee3aa6bSSatish Balay 
7465615d1e5SSatish Balay #undef __FUNC__
7475615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
748cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
749cee3aa6bSSatish Balay {
750cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
751cee3aa6bSSatish Balay   int        ierr;
752cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
753cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
754cee3aa6bSSatish Balay   return 0;
755cee3aa6bSSatish Balay }
756026e39d0SSatish Balay 
7575615d1e5SSatish Balay #undef __FUNC__
7585615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
75957b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
76057b952d6SSatish Balay {
76157b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
76257b952d6SSatish Balay   *m = mat->M; *n = mat->N;
76357b952d6SSatish Balay   return 0;
76457b952d6SSatish Balay }
76557b952d6SSatish Balay 
7665615d1e5SSatish Balay #undef __FUNC__
7675615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
76857b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
76957b952d6SSatish Balay {
77057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
77157b952d6SSatish Balay   *m = mat->m; *n = mat->N;
77257b952d6SSatish Balay   return 0;
77357b952d6SSatish Balay }
77457b952d6SSatish Balay 
7755615d1e5SSatish Balay #undef __FUNC__
7765615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
77757b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
77857b952d6SSatish Balay {
77957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
78057b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
78157b952d6SSatish Balay   return 0;
78257b952d6SSatish Balay }
78357b952d6SSatish Balay 
784acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
785acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
786acdf5bf4SSatish Balay 
7875615d1e5SSatish Balay #undef __FUNC__
7885615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
789acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
790acdf5bf4SSatish Balay {
791acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
792acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
793acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
794d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
795d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
796acdf5bf4SSatish Balay 
797e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
798acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
799acdf5bf4SSatish Balay 
800acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
801acdf5bf4SSatish Balay     /*
802acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
803acdf5bf4SSatish Balay     */
804acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
805bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
806bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
807acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
808acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
809acdf5bf4SSatish Balay     }
810acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
811acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
812acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
813acdf5bf4SSatish Balay   }
814acdf5bf4SSatish Balay 
815acdf5bf4SSatish Balay 
816e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
817d9d09a02SSatish Balay   lrow = row - brstart;
818acdf5bf4SSatish Balay 
819acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
820acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
821acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
822acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
823acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
824acdf5bf4SSatish Balay   nztot = nzA + nzB;
825acdf5bf4SSatish Balay 
826acdf5bf4SSatish Balay   cmap  = mat->garray;
827acdf5bf4SSatish Balay   if (v  || idx) {
828acdf5bf4SSatish Balay     if (nztot) {
829acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
830acdf5bf4SSatish Balay       int imark = -1;
831acdf5bf4SSatish Balay       if (v) {
832acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
833acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
834d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
835acdf5bf4SSatish Balay           else break;
836acdf5bf4SSatish Balay         }
837acdf5bf4SSatish Balay         imark = i;
838acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
839acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
840acdf5bf4SSatish Balay       }
841acdf5bf4SSatish Balay       if (idx) {
842acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
843acdf5bf4SSatish Balay         if (imark > -1) {
844acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
845bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
846acdf5bf4SSatish Balay           }
847acdf5bf4SSatish Balay         } else {
848acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
849d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
850d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
851acdf5bf4SSatish Balay             else break;
852acdf5bf4SSatish Balay           }
853acdf5bf4SSatish Balay           imark = i;
854acdf5bf4SSatish Balay         }
855d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
856d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
857acdf5bf4SSatish Balay       }
858acdf5bf4SSatish Balay     }
859d212a18eSSatish Balay     else {
860d212a18eSSatish Balay       if (idx) *idx = 0;
861d212a18eSSatish Balay       if (v)   *v   = 0;
862d212a18eSSatish Balay     }
863acdf5bf4SSatish Balay   }
864acdf5bf4SSatish Balay   *nz = nztot;
865acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
866acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
867acdf5bf4SSatish Balay   return 0;
868acdf5bf4SSatish Balay }
869acdf5bf4SSatish Balay 
8705615d1e5SSatish Balay #undef __FUNC__
8715615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
872acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
873acdf5bf4SSatish Balay {
874acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
875acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
876e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
877acdf5bf4SSatish Balay   }
878acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
879acdf5bf4SSatish Balay   return 0;
880acdf5bf4SSatish Balay }
881acdf5bf4SSatish Balay 
8825615d1e5SSatish Balay #undef __FUNC__
8835615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
8845a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
8855a838052SSatish Balay {
8865a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
8875a838052SSatish Balay   *bs = baij->bs;
8885a838052SSatish Balay   return 0;
8895a838052SSatish Balay }
8905a838052SSatish Balay 
8915615d1e5SSatish Balay #undef __FUNC__
8925615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
89358667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
89458667388SSatish Balay {
89558667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
89658667388SSatish Balay   int         ierr;
89758667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
89858667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
89958667388SSatish Balay   return 0;
90058667388SSatish Balay }
9010ac07820SSatish Balay 
9025615d1e5SSatish Balay #undef __FUNC__
9035615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
9044e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
9050ac07820SSatish Balay {
9064e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
9074e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
9087d57db60SLois Curfman McInnes   int         ierr;
9097d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
9100ac07820SSatish Balay 
9114e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
9124e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
9134e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
9144e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
9154e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
9164e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
9174e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
9184e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
9194e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
9200ac07820SSatish Balay   if (flag == MAT_LOCAL) {
9214e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
9224e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
9234e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
9244e220ebcSLois Curfman McInnes     info->memory       = isend[3];
9254e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
9260ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
9270ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm);
9284e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9294e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9304e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9314e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9324e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
9330ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
9340ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm);
9354e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9364e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9374e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9384e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9394e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
9400ac07820SSatish Balay   }
9414e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
9424e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
9434e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
9440ac07820SSatish Balay   return 0;
9450ac07820SSatish Balay }
9460ac07820SSatish Balay 
9475615d1e5SSatish Balay #undef __FUNC__
9485615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
94958667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
95058667388SSatish Balay {
95158667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
95258667388SSatish Balay 
95358667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
95458667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
9556da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
956b1fbbac0SLois Curfman McInnes       op == MAT_COLUMNS_SORTED) {
957b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
958b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
959b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
960aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
96158667388SSatish Balay         MatSetOption(a->A,op);
96258667388SSatish Balay         MatSetOption(a->B,op);
963b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
9646da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
96558667388SSatish Balay              op == MAT_SYMMETRIC ||
96658667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
96758667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
96858667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
96958667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
97058667388SSatish Balay     a->roworiented = 0;
97158667388SSatish Balay     MatSetOption(a->A,op);
97258667388SSatish Balay     MatSetOption(a->B,op);
97390f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
97490f02eecSBarry Smith     a->donotstash = 1;
97590f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
976e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
97758667388SSatish Balay   else
978e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
97958667388SSatish Balay   return 0;
98058667388SSatish Balay }
98158667388SSatish Balay 
9825615d1e5SSatish Balay #undef __FUNC__
9835615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
9840ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
9850ac07820SSatish Balay {
9860ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
9870ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
9880ac07820SSatish Balay   Mat        B;
9890ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
9900ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
9910ac07820SSatish Balay   Scalar     *a;
9920ac07820SSatish Balay 
9930ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
994e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
9950ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
9960ac07820SSatish Balay   CHKERRQ(ierr);
9970ac07820SSatish Balay 
9980ac07820SSatish Balay   /* copy over the A part */
9990ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
10000ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
10010ac07820SSatish Balay   row = baij->rstart;
10020ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
10030ac07820SSatish Balay 
10040ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
10050ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
10060ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
10070ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
10080ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
10090ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
10100ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
10110ac07820SSatish Balay         col++; a += bs;
10120ac07820SSatish Balay       }
10130ac07820SSatish Balay     }
10140ac07820SSatish Balay   }
10150ac07820SSatish Balay   /* copy over the B part */
10160ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
10170ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
10180ac07820SSatish Balay   row = baij->rstart*bs;
10190ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
10200ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
10210ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
10220ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
10230ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
10240ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
10250ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
10260ac07820SSatish Balay         col++; a += bs;
10270ac07820SSatish Balay       }
10280ac07820SSatish Balay     }
10290ac07820SSatish Balay   }
10300ac07820SSatish Balay   PetscFree(rvals);
10310ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
10320ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
10330ac07820SSatish Balay 
10340ac07820SSatish Balay   if (matout != PETSC_NULL) {
10350ac07820SSatish Balay     *matout = B;
10360ac07820SSatish Balay   } else {
10370ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
10380ac07820SSatish Balay     PetscFree(baij->rowners);
10390ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
10400ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
10410ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
10420ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
10430ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
10440ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
10450ac07820SSatish Balay     PetscFree(baij);
10460ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
10470ac07820SSatish Balay     PetscHeaderDestroy(B);
10480ac07820SSatish Balay   }
10490ac07820SSatish Balay   return 0;
10500ac07820SSatish Balay }
10510e95ebc0SSatish Balay 
10525615d1e5SSatish Balay #undef __FUNC__
10535615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
10540e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
10550e95ebc0SSatish Balay {
10560e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
10570e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
10580e95ebc0SSatish Balay   int ierr,s1,s2,s3;
10590e95ebc0SSatish Balay 
10600e95ebc0SSatish Balay   if (ll)  {
10610e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
10620e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1063e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
10640e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
10650e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
10660e95ebc0SSatish Balay   }
1067e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
10680e95ebc0SSatish Balay   return 0;
10690e95ebc0SSatish Balay }
10700e95ebc0SSatish Balay 
10710ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
10720ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
10730ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
10740ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
10750ac07820SSatish Balay    routine.
10760ac07820SSatish Balay */
10775615d1e5SSatish Balay #undef __FUNC__
10785615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
10790ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
10800ac07820SSatish Balay {
10810ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
10820ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
10830ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
10840ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
10850ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
10860ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
10870ac07820SSatish Balay   MPI_Comm       comm = A->comm;
10880ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
10890ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
10900ac07820SSatish Balay   IS             istmp;
10910ac07820SSatish Balay 
10920ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
10930ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
10940ac07820SSatish Balay 
10950ac07820SSatish Balay   /*  first count number of contributors to each processor */
10960ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
10970ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
10980ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
10990ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
11000ac07820SSatish Balay     idx = rows[i];
11010ac07820SSatish Balay     found = 0;
11020ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
11030ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
11040ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
11050ac07820SSatish Balay       }
11060ac07820SSatish Balay     }
1107e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
11080ac07820SSatish Balay   }
11090ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
11100ac07820SSatish Balay 
11110ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
11120ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
11130ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
11140ac07820SSatish Balay   nrecvs = work[rank];
11150ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
11160ac07820SSatish Balay   nmax = work[rank];
11170ac07820SSatish Balay   PetscFree(work);
11180ac07820SSatish Balay 
11190ac07820SSatish Balay   /* post receives:   */
11200ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
11210ac07820SSatish Balay   CHKPTRQ(rvalues);
11220ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
11230ac07820SSatish Balay   CHKPTRQ(recv_waits);
11240ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
11250ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
11260ac07820SSatish Balay   }
11270ac07820SSatish Balay 
11280ac07820SSatish Balay   /* do sends:
11290ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
11300ac07820SSatish Balay          the ith processor
11310ac07820SSatish Balay   */
11320ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
11330ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
11340ac07820SSatish Balay   CHKPTRQ(send_waits);
11350ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
11360ac07820SSatish Balay   starts[0] = 0;
11370ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
11380ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
11390ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
11400ac07820SSatish Balay   }
11410ac07820SSatish Balay   ISRestoreIndices(is,&rows);
11420ac07820SSatish Balay 
11430ac07820SSatish Balay   starts[0] = 0;
11440ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
11450ac07820SSatish Balay   count = 0;
11460ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
11470ac07820SSatish Balay     if (procs[i]) {
11480ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
11490ac07820SSatish Balay     }
11500ac07820SSatish Balay   }
11510ac07820SSatish Balay   PetscFree(starts);
11520ac07820SSatish Balay 
11530ac07820SSatish Balay   base = owners[rank]*bs;
11540ac07820SSatish Balay 
11550ac07820SSatish Balay   /*  wait on receives */
11560ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
11570ac07820SSatish Balay   source = lens + nrecvs;
11580ac07820SSatish Balay   count  = nrecvs; slen = 0;
11590ac07820SSatish Balay   while (count) {
11600ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
11610ac07820SSatish Balay     /* unpack receives into our local space */
11620ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
11630ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
11640ac07820SSatish Balay     lens[imdex]  = n;
11650ac07820SSatish Balay     slen += n;
11660ac07820SSatish Balay     count--;
11670ac07820SSatish Balay   }
11680ac07820SSatish Balay   PetscFree(recv_waits);
11690ac07820SSatish Balay 
11700ac07820SSatish Balay   /* move the data into the send scatter */
11710ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
11720ac07820SSatish Balay   count = 0;
11730ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
11740ac07820SSatish Balay     values = rvalues + i*nmax;
11750ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
11760ac07820SSatish Balay       lrows[count++] = values[j] - base;
11770ac07820SSatish Balay     }
11780ac07820SSatish Balay   }
11790ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
11800ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
11810ac07820SSatish Balay 
11820ac07820SSatish Balay   /* actually zap the local rows */
1183537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
11840ac07820SSatish Balay   PLogObjectParent(A,istmp);
11850ac07820SSatish Balay   PetscFree(lrows);
11860ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
11870ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
11880ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
11890ac07820SSatish Balay 
11900ac07820SSatish Balay   /* wait on sends */
11910ac07820SSatish Balay   if (nsends) {
11920ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
11930ac07820SSatish Balay     CHKPTRQ(send_status);
11940ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
11950ac07820SSatish Balay     PetscFree(send_status);
11960ac07820SSatish Balay   }
11970ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
11980ac07820SSatish Balay 
11990ac07820SSatish Balay   return 0;
12000ac07820SSatish Balay }
1201ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
12025615d1e5SSatish Balay #undef __FUNC__
12035615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1204ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A)
1205ba4ca20aSSatish Balay {
1206ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1207ba4ca20aSSatish Balay 
1208ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1209ba4ca20aSSatish Balay   else return 0;
1210ba4ca20aSSatish Balay }
12110ac07820SSatish Balay 
12125615d1e5SSatish Balay #undef __FUNC__
12135615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1214bb5a7306SBarry Smith static int MatSetUnfactored_MPIBAIJ(Mat A)
1215bb5a7306SBarry Smith {
1216bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1217bb5a7306SBarry Smith   int         ierr;
1218bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1219bb5a7306SBarry Smith   return 0;
1220bb5a7306SBarry Smith }
1221bb5a7306SBarry Smith 
12220ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
12230ac07820SSatish Balay 
122479bdfe76SSatish Balay /* -------------------------------------------------------------------*/
122579bdfe76SSatish Balay static struct _MatOps MatOps = {
1226bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
12274c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
12284c50302cSBarry Smith   0,0,0,0,
12290ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
12300e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
123158667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
12324c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
12334c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
12344c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1235905e6a2fSBarry Smith   0,0,0,MatConvertSameType_MPIBAIJ,0,0,
1236d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1237ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1238bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1239bb5a7306SBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ};
124079bdfe76SSatish Balay 
124179bdfe76SSatish Balay 
12425615d1e5SSatish Balay #undef __FUNC__
12435615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
124479bdfe76SSatish Balay /*@C
124579bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
124679bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
124779bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
124879bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
124979bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
125079bdfe76SSatish Balay 
125179bdfe76SSatish Balay    Input Parameters:
125279bdfe76SSatish Balay .  comm - MPI communicator
125379bdfe76SSatish Balay .  bs   - size of blockk
125479bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
125592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
125692e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
125792e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
125892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
125992e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
126079bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
126192e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
126279bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
126379bdfe76SSatish Balay            submatrix  (same for all local rows)
126492e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
126592e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
126692e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
126792e8d321SLois Curfman McInnes            it is zero.
126892e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
126979bdfe76SSatish Balay            submatrix (same for all local rows).
127092e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
127192e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
127292e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
127379bdfe76SSatish Balay 
127479bdfe76SSatish Balay    Output Parameter:
127579bdfe76SSatish Balay .  A - the matrix
127679bdfe76SSatish Balay 
127779bdfe76SSatish Balay    Notes:
127879bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
127979bdfe76SSatish Balay    (possibly both).
128079bdfe76SSatish Balay 
128179bdfe76SSatish Balay    Storage Information:
128279bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
128379bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
128479bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
128579bdfe76SSatish Balay    local matrix (a rectangular submatrix).
128679bdfe76SSatish Balay 
128779bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
128879bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
128979bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
129079bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
129179bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
129279bdfe76SSatish Balay 
129379bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
129479bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
129579bdfe76SSatish Balay 
129679bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
129779bdfe76SSatish Balay $         -------------------
129879bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
129979bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
130079bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
130179bdfe76SSatish Balay $         -------------------
130279bdfe76SSatish Balay $
130379bdfe76SSatish Balay 
130479bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
130579bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
130679bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
130757b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
130879bdfe76SSatish Balay 
130979bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
131079bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
131179bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
131292e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
131392e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
13146da5968aSLois Curfman McInnes    matrices.
131579bdfe76SSatish Balay 
131692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
131779bdfe76SSatish Balay 
131879bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
131979bdfe76SSatish Balay @*/
132079bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
132179bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
132279bdfe76SSatish Balay {
132379bdfe76SSatish Balay   Mat          B;
132479bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
13254c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
132679bdfe76SSatish Balay 
1327e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
132879bdfe76SSatish Balay   *A = 0;
132979bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
133079bdfe76SSatish Balay   PLogObjectCreate(B);
133179bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
133279bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
133379bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
13344c50302cSBarry Smith   MPI_Comm_size(comm,&size);
13354c50302cSBarry Smith   if (size == 1) {
13364c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
13374c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
13384c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
13394c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
13404c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
13414c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
13424c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
13434c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
13444c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
13454c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
13464c50302cSBarry Smith   }
13474c50302cSBarry Smith 
134879bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
134979bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
135090f02eecSBarry Smith   B->mapping    = 0;
135179bdfe76SSatish Balay   B->factor     = 0;
135279bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
135379bdfe76SSatish Balay 
135479bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
135579bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
135679bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
135779bdfe76SSatish Balay 
135879bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1359e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1360e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1361e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1362cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1363cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
136479bdfe76SSatish Balay 
136579bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
136679bdfe76SSatish Balay     work[0] = m; work[1] = n;
136779bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
136879bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
136979bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
137079bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
137179bdfe76SSatish Balay   }
137279bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
137379bdfe76SSatish Balay     Mbs = M/bs;
1374e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
137579bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
137679bdfe76SSatish Balay     m   = mbs*bs;
137779bdfe76SSatish Balay   }
137879bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
137979bdfe76SSatish Balay     Nbs = N/bs;
1380e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
138179bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
138279bdfe76SSatish Balay     n   = nbs*bs;
138379bdfe76SSatish Balay   }
1384e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
138579bdfe76SSatish Balay 
138679bdfe76SSatish Balay   b->m = m; B->m = m;
138779bdfe76SSatish Balay   b->n = n; B->n = n;
138879bdfe76SSatish Balay   b->N = N; B->N = N;
138979bdfe76SSatish Balay   b->M = M; B->M = M;
139079bdfe76SSatish Balay   b->bs  = bs;
139179bdfe76SSatish Balay   b->bs2 = bs*bs;
139279bdfe76SSatish Balay   b->mbs = mbs;
139379bdfe76SSatish Balay   b->nbs = nbs;
139479bdfe76SSatish Balay   b->Mbs = Mbs;
139579bdfe76SSatish Balay   b->Nbs = Nbs;
139679bdfe76SSatish Balay 
139779bdfe76SSatish Balay   /* build local table of row and column ownerships */
139879bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
139979bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
14000ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
140179bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
140279bdfe76SSatish Balay   b->rowners[0] = 0;
140379bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
140479bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
140579bdfe76SSatish Balay   }
140679bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
140779bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
14084fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
14094fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
14104fa0d573SSatish Balay 
141179bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
141279bdfe76SSatish Balay   b->cowners[0] = 0;
141379bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
141479bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
141579bdfe76SSatish Balay   }
141679bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
141779bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
14184fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
14194fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
142079bdfe76SSatish Balay 
142179bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
142279bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
142379bdfe76SSatish Balay   PLogObjectParent(B,b->A);
142479bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
142579bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
142679bdfe76SSatish Balay   PLogObjectParent(B,b->B);
142779bdfe76SSatish Balay 
142879bdfe76SSatish Balay   /* build cache for off array entries formed */
142979bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
143090f02eecSBarry Smith   b->donotstash  = 0;
143179bdfe76SSatish Balay   b->colmap      = 0;
143279bdfe76SSatish Balay   b->garray      = 0;
143379bdfe76SSatish Balay   b->roworiented = 1;
143479bdfe76SSatish Balay 
143579bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
143679bdfe76SSatish Balay   b->lvec      = 0;
143779bdfe76SSatish Balay   b->Mvctx     = 0;
143879bdfe76SSatish Balay 
143979bdfe76SSatish Balay   /* stuff for MatGetRow() */
144079bdfe76SSatish Balay   b->rowindices   = 0;
144179bdfe76SSatish Balay   b->rowvalues    = 0;
144279bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
144379bdfe76SSatish Balay 
144479bdfe76SSatish Balay   *A = B;
144579bdfe76SSatish Balay   return 0;
144679bdfe76SSatish Balay }
1447026e39d0SSatish Balay 
14485615d1e5SSatish Balay #undef __FUNC__
14495615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
14500ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
14510ac07820SSatish Balay {
14520ac07820SSatish Balay   Mat        mat;
14530ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
14540ac07820SSatish Balay   int        ierr, len=0, flg;
14550ac07820SSatish Balay 
14560ac07820SSatish Balay   *newmat       = 0;
14570ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
14580ac07820SSatish Balay   PLogObjectCreate(mat);
14590ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
14600ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
14610ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
14620ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
14630ac07820SSatish Balay   mat->factor     = matin->factor;
14640ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
14650ac07820SSatish Balay 
14660ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
14670ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
14680ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
14690ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
14700ac07820SSatish Balay 
14710ac07820SSatish Balay   a->bs  = oldmat->bs;
14720ac07820SSatish Balay   a->bs2 = oldmat->bs2;
14730ac07820SSatish Balay   a->mbs = oldmat->mbs;
14740ac07820SSatish Balay   a->nbs = oldmat->nbs;
14750ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
14760ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
14770ac07820SSatish Balay 
14780ac07820SSatish Balay   a->rstart       = oldmat->rstart;
14790ac07820SSatish Balay   a->rend         = oldmat->rend;
14800ac07820SSatish Balay   a->cstart       = oldmat->cstart;
14810ac07820SSatish Balay   a->cend         = oldmat->cend;
14820ac07820SSatish Balay   a->size         = oldmat->size;
14830ac07820SSatish Balay   a->rank         = oldmat->rank;
14840ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
14850ac07820SSatish Balay   a->rowvalues    = 0;
14860ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
14870ac07820SSatish Balay 
14880ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
14890ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
14900ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
14910ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
14920ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14930ac07820SSatish Balay   if (oldmat->colmap) {
14940ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
14950ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
14960ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
14970ac07820SSatish Balay   } else a->colmap = 0;
14980ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
14990ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
15000ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
15010ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
15020ac07820SSatish Balay   } else a->garray = 0;
15030ac07820SSatish Balay 
15040ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
15050ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
15060ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
15070ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
15080ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
15090ac07820SSatish Balay   PLogObjectParent(mat,a->A);
15100ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
15110ac07820SSatish Balay   PLogObjectParent(mat,a->B);
15120ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
15130ac07820SSatish Balay   if (flg) {
15140ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
15150ac07820SSatish Balay   }
15160ac07820SSatish Balay   *newmat = mat;
15170ac07820SSatish Balay   return 0;
15180ac07820SSatish Balay }
151957b952d6SSatish Balay 
152057b952d6SSatish Balay #include "sys.h"
152157b952d6SSatish Balay 
15225615d1e5SSatish Balay #undef __FUNC__
15235615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
152457b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
152557b952d6SSatish Balay {
152657b952d6SSatish Balay   Mat          A;
152757b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
152857b952d6SSatish Balay   Scalar       *vals,*buf;
152957b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
153057b952d6SSatish Balay   MPI_Status   status;
1531cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
153257b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
153357b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
153457b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
153557b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
153657b952d6SSatish Balay 
153757b952d6SSatish Balay 
153857b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
153957b952d6SSatish Balay   bs2  = bs*bs;
154057b952d6SSatish Balay 
154157b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
154257b952d6SSatish Balay   if (!rank) {
154357b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
154457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1545e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
154657b952d6SSatish Balay   }
154757b952d6SSatish Balay 
154857b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
154957b952d6SSatish Balay   M = header[1]; N = header[2];
155057b952d6SSatish Balay 
1551e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
155257b952d6SSatish Balay 
155357b952d6SSatish Balay   /*
155457b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
155557b952d6SSatish Balay      divisible by the blocksize
155657b952d6SSatish Balay   */
155757b952d6SSatish Balay   Mbs        = M/bs;
155857b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
155957b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
156057b952d6SSatish Balay   else                  Mbs++;
156157b952d6SSatish Balay   if (extra_rows &&!rank) {
1562b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
156357b952d6SSatish Balay   }
1564537820f0SBarry Smith 
156557b952d6SSatish Balay   /* determine ownership of all rows */
156657b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
156757b952d6SSatish Balay   m   = mbs * bs;
1568cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1569cee3aa6bSSatish Balay   browners = rowners + size + 1;
157057b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
157157b952d6SSatish Balay   rowners[0] = 0;
1572cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1573cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
157457b952d6SSatish Balay   rstart = rowners[rank];
157557b952d6SSatish Balay   rend   = rowners[rank+1];
157657b952d6SSatish Balay 
157757b952d6SSatish Balay   /* distribute row lengths to all processors */
157857b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
157957b952d6SSatish Balay   if (!rank) {
158057b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
158157b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
158257b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
158357b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1584cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1585cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
158657b952d6SSatish Balay     PetscFree(sndcounts);
158757b952d6SSatish Balay   }
158857b952d6SSatish Balay   else {
158957b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
159057b952d6SSatish Balay   }
159157b952d6SSatish Balay 
159257b952d6SSatish Balay   if (!rank) {
159357b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
159457b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
159557b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
159657b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
159757b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
159857b952d6SSatish Balay         procsnz[i] += rowlengths[j];
159957b952d6SSatish Balay       }
160057b952d6SSatish Balay     }
160157b952d6SSatish Balay     PetscFree(rowlengths);
160257b952d6SSatish Balay 
160357b952d6SSatish Balay     /* determine max buffer needed and allocate it */
160457b952d6SSatish Balay     maxnz = 0;
160557b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
160657b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
160757b952d6SSatish Balay     }
160857b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
160957b952d6SSatish Balay 
161057b952d6SSatish Balay     /* read in my part of the matrix column indices  */
161157b952d6SSatish Balay     nz = procsnz[0];
161257b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
161357b952d6SSatish Balay     mycols = ibuf;
1614cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
161557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1616cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1617cee3aa6bSSatish Balay 
161857b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
161957b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
162057b952d6SSatish Balay       nz = procsnz[i];
162157b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
162257b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
162357b952d6SSatish Balay     }
162457b952d6SSatish Balay     /* read in the stuff for the last proc */
162557b952d6SSatish Balay     if ( size != 1 ) {
162657b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
162757b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
162857b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1629cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
163057b952d6SSatish Balay     }
163157b952d6SSatish Balay     PetscFree(cols);
163257b952d6SSatish Balay   }
163357b952d6SSatish Balay   else {
163457b952d6SSatish Balay     /* determine buffer space needed for message */
163557b952d6SSatish Balay     nz = 0;
163657b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
163757b952d6SSatish Balay       nz += locrowlens[i];
163857b952d6SSatish Balay     }
163957b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
164057b952d6SSatish Balay     mycols = ibuf;
164157b952d6SSatish Balay     /* receive message of column indices*/
164257b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
164357b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1644e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
164557b952d6SSatish Balay   }
164657b952d6SSatish Balay 
164757b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1648cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1649cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
165057b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1651cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
165257b952d6SSatish Balay   masked1 = mask    + Mbs;
165357b952d6SSatish Balay   masked2 = masked1 + Mbs;
165457b952d6SSatish Balay   rowcount = 0; nzcount = 0;
165557b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
165657b952d6SSatish Balay     dcount  = 0;
165757b952d6SSatish Balay     odcount = 0;
165857b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
165957b952d6SSatish Balay       kmax = locrowlens[rowcount];
166057b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
166157b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
166257b952d6SSatish Balay         if (!mask[tmp]) {
166357b952d6SSatish Balay           mask[tmp] = 1;
166457b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
166557b952d6SSatish Balay           else masked1[dcount++] = tmp;
166657b952d6SSatish Balay         }
166757b952d6SSatish Balay       }
166857b952d6SSatish Balay       rowcount++;
166957b952d6SSatish Balay     }
1670cee3aa6bSSatish Balay 
167157b952d6SSatish Balay     dlens[i]  = dcount;
167257b952d6SSatish Balay     odlens[i] = odcount;
1673cee3aa6bSSatish Balay 
167457b952d6SSatish Balay     /* zero out the mask elements we set */
167557b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
167657b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
167757b952d6SSatish Balay   }
1678cee3aa6bSSatish Balay 
167957b952d6SSatish Balay   /* create our matrix */
1680537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1681537820f0SBarry Smith          CHKERRQ(ierr);
168257b952d6SSatish Balay   A = *newmat;
16836d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
168457b952d6SSatish Balay 
168557b952d6SSatish Balay   if (!rank) {
168657b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
168757b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
168857b952d6SSatish Balay     nz = procsnz[0];
168957b952d6SSatish Balay     vals = buf;
1690cee3aa6bSSatish Balay     mycols = ibuf;
1691cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
169257b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1693cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1694537820f0SBarry Smith 
169557b952d6SSatish Balay     /* insert into matrix */
169657b952d6SSatish Balay     jj      = rstart*bs;
169757b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
169857b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
169957b952d6SSatish Balay       mycols += locrowlens[i];
170057b952d6SSatish Balay       vals   += locrowlens[i];
170157b952d6SSatish Balay       jj++;
170257b952d6SSatish Balay     }
170357b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
170457b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
170557b952d6SSatish Balay       nz = procsnz[i];
170657b952d6SSatish Balay       vals = buf;
170757b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
170857b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
170957b952d6SSatish Balay     }
171057b952d6SSatish Balay     /* the last proc */
171157b952d6SSatish Balay     if ( size != 1 ){
171257b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1713cee3aa6bSSatish Balay       vals = buf;
171457b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
171557b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1716cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
171757b952d6SSatish Balay     }
171857b952d6SSatish Balay     PetscFree(procsnz);
171957b952d6SSatish Balay   }
172057b952d6SSatish Balay   else {
172157b952d6SSatish Balay     /* receive numeric values */
172257b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
172357b952d6SSatish Balay 
172457b952d6SSatish Balay     /* receive message of values*/
172557b952d6SSatish Balay     vals = buf;
1726cee3aa6bSSatish Balay     mycols = ibuf;
172757b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
172857b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1729e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
173057b952d6SSatish Balay 
173157b952d6SSatish Balay     /* insert into matrix */
173257b952d6SSatish Balay     jj      = rstart*bs;
1733cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
173457b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
173557b952d6SSatish Balay       mycols += locrowlens[i];
173657b952d6SSatish Balay       vals   += locrowlens[i];
173757b952d6SSatish Balay       jj++;
173857b952d6SSatish Balay     }
173957b952d6SSatish Balay   }
174057b952d6SSatish Balay   PetscFree(locrowlens);
174157b952d6SSatish Balay   PetscFree(buf);
174257b952d6SSatish Balay   PetscFree(ibuf);
174357b952d6SSatish Balay   PetscFree(rowners);
174457b952d6SSatish Balay   PetscFree(dlens);
1745cee3aa6bSSatish Balay   PetscFree(mask);
17466d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
17476d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
174857b952d6SSatish Balay   return 0;
174957b952d6SSatish Balay }
175057b952d6SSatish Balay 
175157b952d6SSatish Balay 
1752