xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 0520107fe0fb6fb2de36e5dc2c15a82b6ccd3e44)
179bdfe76SSatish Balay #ifndef lint
2*0520107fSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.49 1997/01/14 15:50:48 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
570f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay #include "draw.h"
957b952d6SSatish Balay #include "pinclude/pviewer.h"
1057b952d6SSatish Balay 
1157b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
1257b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
13d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
14d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
1593bc47c4SSatish Balay extern int MatLUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,Mat *);
1693bc47c4SSatish Balay extern int MatLUFactorNumeric_MPIBAIJ(Mat,Mat *);
1793bc47c4SSatish Balay extern int MatLUFactor_MPIBAIJ(Mat,IS,IS,double);
1893bc47c4SSatish Balay extern int MatSolve_MPIBAIJ(Mat,Vec,Vec);
1993bc47c4SSatish Balay extern int MatSolveAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2093bc47c4SSatish Balay extern int MatSolveTrans_MPIBAIJ(Mat,Vec,Vec);
2193bc47c4SSatish Balay extern int MatSolveTransAdd_MPIBAIJ(Mat,Vec,Vec,Vec);
2293bc47c4SSatish Balay extern int MatILUFactorSymbolic_MPIBAIJ(Mat,IS,IS,double,int,Mat *);
2357b952d6SSatish Balay 
243b2fbd54SBarry Smith 
25537820f0SBarry Smith /*
26537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2757b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2857b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2957b952d6SSatish Balay    length of colmap equals the global matrix length.
3057b952d6SSatish Balay */
315615d1e5SSatish Balay #undef __FUNC__
325615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
3357b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
3457b952d6SSatish Balay {
3557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3657b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
37928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
3857b952d6SSatish Balay 
3957b952d6SSatish Balay   baij->colmap = (int *) PetscMalloc(baij->Nbs*sizeof(int));CHKPTRQ(baij->colmap);
4057b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4157b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
42928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4357b952d6SSatish Balay   return 0;
4457b952d6SSatish Balay }
4557b952d6SSatish Balay 
465615d1e5SSatish Balay #undef __FUNC__
475615d1e5SSatish Balay #define __FUNC__ "MatGetRowIJ_MPIBAIJ("
483b2fbd54SBarry Smith static int MatGetRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
493b2fbd54SBarry Smith                             PetscTruth *done)
5057b952d6SSatish Balay {
513b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
5257b952d6SSatish Balay   int         ierr;
533b2fbd54SBarry Smith   if (aij->size == 1) {
543b2fbd54SBarry Smith     ierr = MatGetRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
55e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
563b2fbd54SBarry Smith   return 0;
573b2fbd54SBarry Smith }
583b2fbd54SBarry Smith 
595615d1e5SSatish Balay #undef __FUNC__
605615d1e5SSatish Balay #define __FUNC__ "MatRestoreRowIJ_MPIBAIJ"
613b2fbd54SBarry Smith static int MatRestoreRowIJ_MPIBAIJ(Mat mat,int shift,PetscTruth symmetric,int *n,int **ia,int **ja,
623b2fbd54SBarry Smith                                 PetscTruth *done)
633b2fbd54SBarry Smith {
643b2fbd54SBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *) mat->data;
653b2fbd54SBarry Smith   int        ierr;
663b2fbd54SBarry Smith   if (aij->size == 1) {
673b2fbd54SBarry Smith     ierr = MatRestoreRowIJ(aij->A,shift,symmetric,n,ia,ja,done); CHKERRQ(ierr);
68e3372554SBarry Smith   } else SETERRQ(1,0,"not supported in parallel");
6957b952d6SSatish Balay   return 0;
7057b952d6SSatish Balay }
7180c1aa95SSatish Balay #define CHUNKSIZE  10
7280c1aa95SSatish Balay 
73eada6651SSatish Balay #define  MatSetValues_SeqBAIJ_A_Private( row, col, value) \
7480c1aa95SSatish Balay { \
7580c1aa95SSatish Balay  \
7680c1aa95SSatish Balay     brow = row/bs;  \
7780c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
7880c1aa95SSatish Balay     rmax = imax[brow]; nrow = ailen[brow]; \
7980c1aa95SSatish Balay       bcol = col/bs; \
8080c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
8180c1aa95SSatish Balay       for ( _i=0; _i<nrow; _i++ ) { \
8280c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
8380c1aa95SSatish Balay         if (rp[_i] == bcol) { \
8480c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
85eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
86eada6651SSatish Balay           else                    *bap  = value;  \
8780c1aa95SSatish Balay           goto _noinsert; \
8880c1aa95SSatish Balay         } \
8980c1aa95SSatish Balay       } \
90*0520107fSSatish Balay       if (a->nonew) goto _noinsert; \
9180c1aa95SSatish Balay       if (nrow >= rmax) { \
9280c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
9380c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
9480c1aa95SSatish Balay         Scalar *new_a; \
9580c1aa95SSatish Balay  \
9680c1aa95SSatish Balay         /* malloc new storage space */ \
9780c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
9880c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
9980c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
10080c1aa95SSatish Balay         new_i   = new_j + new_nz; \
10180c1aa95SSatish Balay  \
10280c1aa95SSatish Balay         /* copy over old data into new slots */ \
10380c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
10480c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
10580c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
10680c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
10780c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
10880c1aa95SSatish Balay                                                            len*sizeof(int)); \
10980c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
11080c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
11180c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
11280c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
11380c1aa95SSatish Balay         /* free up old matrix storage */ \
11480c1aa95SSatish Balay         PetscFree(a->a);  \
11580c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
11680c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
11780c1aa95SSatish Balay         a->singlemalloc = 1; \
11880c1aa95SSatish Balay  \
11980c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
12080c1aa95SSatish Balay         rmax = imax[brow] = imax[brow] + CHUNKSIZE; \
12180c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
12280c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
12380c1aa95SSatish Balay         a->reallocs++; \
12480c1aa95SSatish Balay         a->nz++; \
12580c1aa95SSatish Balay       } \
12680c1aa95SSatish Balay       N = nrow++ - 1;  \
12780c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
12880c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
12980c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
13080c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
13180c1aa95SSatish Balay       } \
13280c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
13380c1aa95SSatish Balay       rp[_i]                      = bcol;  \
13480c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
13580c1aa95SSatish Balay       _noinsert:; \
13680c1aa95SSatish Balay     ailen[brow] = nrow; \
13780c1aa95SSatish Balay }
13857b952d6SSatish Balay 
139639f9d9dSBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
1405615d1e5SSatish Balay #undef __FUNC__
1415615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
14257b952d6SSatish Balay static int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
14357b952d6SSatish Balay {
14457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
14557b952d6SSatish Balay   Scalar      value;
1464fa0d573SSatish Balay   int         ierr,i,j,row,col;
1474fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1484fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1494fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
15057b952d6SSatish Balay 
151eada6651SSatish Balay   /* Some Variables required in the macro */
15280c1aa95SSatish Balay   Mat         A = baij->A;
15380c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
15480c1aa95SSatish Balay   int         *rp,ii,nrow,_i,rmax,N;
15580c1aa95SSatish Balay   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
15680c1aa95SSatish Balay   int         *aj=a->j,brow,bcol;
15780c1aa95SSatish Balay   int         ridx,cidx,bs2=a->bs2;
15880c1aa95SSatish Balay   Scalar      *ap,*aa=a->a,*bap;
15980c1aa95SSatish Balay 
1602c3acbe9SBarry Smith #if defined(PETSC_BOPT_g)
16157b952d6SSatish Balay   if (baij->insertmode != NOT_SET_VALUES && baij->insertmode != addv) {
162e3372554SBarry Smith     SETERRQ(1,0,"Cannot mix inserts and adds");
16357b952d6SSatish Balay   }
1642c3acbe9SBarry Smith #endif
16557b952d6SSatish Balay   baij->insertmode = addv;
16657b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
167639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
168e3372554SBarry Smith     if (im[i] < 0) SETERRQ(1,0,"Negative row");
169e3372554SBarry Smith     if (im[i] >= baij->M) SETERRQ(1,0,"Row too large");
170639f9d9dSBarry Smith #endif
17157b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
17257b952d6SSatish Balay       row = im[i] - rstart_orig;
17357b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
17457b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
17557b952d6SSatish Balay           col = in[j] - cstart_orig;
17657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
177eada6651SSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value);
17880c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
17957b952d6SSatish Balay         }
180639f9d9dSBarry Smith #if defined(PETSC_BOPT_g)
181e3372554SBarry Smith         else if (in[j] < 0) {SETERRQ(1,0,"Negative column");}
182e3372554SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(1,0,"Col too large");}
183639f9d9dSBarry Smith #endif
18457b952d6SSatish Balay         else {
18557b952d6SSatish Balay           if (mat->was_assembled) {
186905e6a2fSBarry Smith             if (!baij->colmap) {
187905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
188905e6a2fSBarry Smith             }
189905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
19057b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
19157b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
19257b952d6SSatish Balay               col =  in[j];
19357b952d6SSatish Balay             }
19457b952d6SSatish Balay           }
19557b952d6SSatish Balay           else col = in[j];
19657b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
197639f9d9dSBarry Smith           ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr);
19857b952d6SSatish Balay         }
19957b952d6SSatish Balay       }
20057b952d6SSatish Balay     }
20157b952d6SSatish Balay     else {
20290f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
20357b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
20457b952d6SSatish Balay       }
20557b952d6SSatish Balay       else {
20690f02eecSBarry Smith         if (!baij->donotstash) {
20757b952d6SSatish Balay           row = im[i];
20857b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
20957b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
21057b952d6SSatish Balay           }
21157b952d6SSatish Balay         }
21257b952d6SSatish Balay       }
21357b952d6SSatish Balay     }
21490f02eecSBarry Smith   }
21557b952d6SSatish Balay   return 0;
21657b952d6SSatish Balay }
21757b952d6SSatish Balay 
2185615d1e5SSatish Balay #undef __FUNC__
2195615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
220d6de1c52SSatish Balay static int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
221d6de1c52SSatish Balay {
222d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
223d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
224d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
225d6de1c52SSatish Balay 
226d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
227e3372554SBarry Smith     if (idxm[i] < 0) SETERRQ(1,0,"Negative row");
228e3372554SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(1,0,"Row too large");
229d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
230d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
231d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
232e3372554SBarry Smith         if (idxn[j] < 0) SETERRQ(1,0,"Negative column");
233e3372554SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(1,0,"Col too large");
234d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
235d6de1c52SSatish Balay           col = idxn[j] - bscstart;
236d6de1c52SSatish Balay           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
237d6de1c52SSatish Balay         }
238d6de1c52SSatish Balay         else {
239905e6a2fSBarry Smith           if (!baij->colmap) {
240905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
241905e6a2fSBarry Smith           }
242e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
243dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
244d9d09a02SSatish Balay           else {
245dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
246d6de1c52SSatish Balay             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
247d6de1c52SSatish Balay           }
248d6de1c52SSatish Balay         }
249d6de1c52SSatish Balay       }
250d9d09a02SSatish Balay     }
251d6de1c52SSatish Balay     else {
252e3372554SBarry Smith       SETERRQ(1,0,"Only local values currently supported");
253d6de1c52SSatish Balay     }
254d6de1c52SSatish Balay   }
255d6de1c52SSatish Balay   return 0;
256d6de1c52SSatish Balay }
257d6de1c52SSatish Balay 
2585615d1e5SSatish Balay #undef __FUNC__
2595615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
260d6de1c52SSatish Balay static int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
261d6de1c52SSatish Balay {
262d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
263d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
264acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
265d6de1c52SSatish Balay   double     sum = 0.0;
266d6de1c52SSatish Balay   Scalar     *v;
267d6de1c52SSatish Balay 
268d6de1c52SSatish Balay   if (baij->size == 1) {
269d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
270d6de1c52SSatish Balay   } else {
271d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
272d6de1c52SSatish Balay       v = amat->a;
273d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
274d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
275d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
276d6de1c52SSatish Balay #else
277d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
278d6de1c52SSatish Balay #endif
279d6de1c52SSatish Balay       }
280d6de1c52SSatish Balay       v = bmat->a;
281d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
282d6de1c52SSatish Balay #if defined(PETSC_COMPLEX)
283d6de1c52SSatish Balay         sum += real(conj(*v)*(*v)); v++;
284d6de1c52SSatish Balay #else
285d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
286d6de1c52SSatish Balay #endif
287d6de1c52SSatish Balay       }
288d6de1c52SSatish Balay       MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);
289d6de1c52SSatish Balay       *norm = sqrt(*norm);
290d6de1c52SSatish Balay     }
291acdf5bf4SSatish Balay     else
292e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
293d6de1c52SSatish Balay   }
294d6de1c52SSatish Balay   return 0;
295d6de1c52SSatish Balay }
29657b952d6SSatish Balay 
2975615d1e5SSatish Balay #undef __FUNC__
2985615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
29957b952d6SSatish Balay static int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
30057b952d6SSatish Balay {
30157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
30257b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
30357b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
30457b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
30557b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
30657b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
30757b952d6SSatish Balay   InsertMode  addv;
30857b952d6SSatish Balay   Scalar      *rvalues,*svalues;
30957b952d6SSatish Balay 
31057b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
31157b952d6SSatish Balay   MPI_Allreduce(&baij->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
31257b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
313e3372554SBarry Smith     SETERRQ(1,0,"Some processors inserted others added");
31457b952d6SSatish Balay   }
31557b952d6SSatish Balay   baij->insertmode = addv; /* in case this processor had no cache */
31657b952d6SSatish Balay 
31757b952d6SSatish Balay   /*  first count number of contributors to each processor */
31857b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
31957b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
32057b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
32157b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
32257b952d6SSatish Balay     idx = baij->stash.idx[i];
32357b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
32457b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
32557b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
32657b952d6SSatish Balay       }
32757b952d6SSatish Balay     }
32857b952d6SSatish Balay   }
32957b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
33057b952d6SSatish Balay 
33157b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
33257b952d6SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
33357b952d6SSatish Balay   MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);
33457b952d6SSatish Balay   nreceives = work[rank];
33557b952d6SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
33657b952d6SSatish Balay   nmax = work[rank];
33757b952d6SSatish Balay   PetscFree(work);
33857b952d6SSatish Balay 
33957b952d6SSatish Balay   /* post receives:
34057b952d6SSatish Balay        1) each message will consist of ordered pairs
34157b952d6SSatish Balay      (global index,value) we store the global index as a double
34257b952d6SSatish Balay      to simplify the message passing.
34357b952d6SSatish Balay        2) since we don't know how long each individual message is we
34457b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
34557b952d6SSatish Balay      this is a lot of wasted space.
34657b952d6SSatish Balay 
34757b952d6SSatish Balay 
34857b952d6SSatish Balay        This could be done better.
34957b952d6SSatish Balay   */
35057b952d6SSatish Balay   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));
35157b952d6SSatish Balay   CHKPTRQ(rvalues);
35257b952d6SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));
35357b952d6SSatish Balay   CHKPTRQ(recv_waits);
35457b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
35557b952d6SSatish Balay     MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,
35657b952d6SSatish Balay               comm,recv_waits+i);
35757b952d6SSatish Balay   }
35857b952d6SSatish Balay 
35957b952d6SSatish Balay   /* do sends:
36057b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
36157b952d6SSatish Balay          the ith processor
36257b952d6SSatish Balay   */
36357b952d6SSatish Balay   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
36457b952d6SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
36557b952d6SSatish Balay   CHKPTRQ(send_waits);
36657b952d6SSatish Balay   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
36757b952d6SSatish Balay   starts[0] = 0;
36857b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
36957b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
37057b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
37157b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
37257b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
37357b952d6SSatish Balay   }
37457b952d6SSatish Balay   PetscFree(owner);
37557b952d6SSatish Balay   starts[0] = 0;
37657b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
37757b952d6SSatish Balay   count = 0;
37857b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
37957b952d6SSatish Balay     if (procs[i]) {
38057b952d6SSatish Balay       MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,
38157b952d6SSatish Balay                 comm,send_waits+count++);
38257b952d6SSatish Balay     }
38357b952d6SSatish Balay   }
38457b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
38557b952d6SSatish Balay 
38657b952d6SSatish Balay   /* Free cache space */
387d2dc9b81SLois Curfman McInnes   PLogInfo(mat,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
38857b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
38957b952d6SSatish Balay 
39057b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
39157b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
39257b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
39357b952d6SSatish Balay   baij->rmax       = nmax;
39457b952d6SSatish Balay 
39557b952d6SSatish Balay   return 0;
39657b952d6SSatish Balay }
39757b952d6SSatish Balay 
39857b952d6SSatish Balay 
3995615d1e5SSatish Balay #undef __FUNC__
4005615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
40157b952d6SSatish Balay static int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
40257b952d6SSatish Balay {
40357b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
40457b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
40557b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
40657b952d6SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
40757b952d6SSatish Balay   Scalar      *values,val;
40857b952d6SSatish Balay   InsertMode  addv = baij->insertmode;
40957b952d6SSatish Balay 
41057b952d6SSatish Balay   /*  wait on receives */
41157b952d6SSatish Balay   while (count) {
41257b952d6SSatish Balay     MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);
41357b952d6SSatish Balay     /* unpack receives into our local space */
41457b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
41557b952d6SSatish Balay     MPI_Get_count(&recv_status,MPIU_SCALAR,&n);
41657b952d6SSatish Balay     n = n/3;
41757b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
41857b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
41957b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
42057b952d6SSatish Balay       val = values[3*i+2];
42157b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
42257b952d6SSatish Balay         col -= baij->cstart*bs;
42357b952d6SSatish Balay         MatSetValues(baij->A,1,&row,1,&col,&val,addv);
42457b952d6SSatish Balay       }
42557b952d6SSatish Balay       else {
42657b952d6SSatish Balay         if (mat->was_assembled) {
427905e6a2fSBarry Smith           if (!baij->colmap) {
428905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
429905e6a2fSBarry Smith           }
430905e6a2fSBarry Smith           col = (baij->colmap[col/bs]-1)*bs + col%bs;
43157b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
43257b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
43357b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
43457b952d6SSatish Balay           }
43557b952d6SSatish Balay         }
43657b952d6SSatish Balay         MatSetValues(baij->B,1,&row,1,&col,&val,addv);
43757b952d6SSatish Balay       }
43857b952d6SSatish Balay     }
43957b952d6SSatish Balay     count--;
44057b952d6SSatish Balay   }
44157b952d6SSatish Balay   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
44257b952d6SSatish Balay 
44357b952d6SSatish Balay   /* wait on sends */
44457b952d6SSatish Balay   if (baij->nsends) {
44557b952d6SSatish Balay     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));
44657b952d6SSatish Balay     CHKPTRQ(send_status);
44757b952d6SSatish Balay     MPI_Waitall(baij->nsends,baij->send_waits,send_status);
44857b952d6SSatish Balay     PetscFree(send_status);
44957b952d6SSatish Balay   }
45057b952d6SSatish Balay   PetscFree(baij->send_waits); PetscFree(baij->svalues);
45157b952d6SSatish Balay 
45257b952d6SSatish Balay   baij->insertmode = NOT_SET_VALUES;
45357b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
45457b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
45557b952d6SSatish Balay 
45657b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
45757b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
45857b952d6SSatish Balay   MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
45957b952d6SSatish Balay   if (mat->was_assembled && !other_disassembled) {
46057b952d6SSatish Balay     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
46157b952d6SSatish Balay   }
46257b952d6SSatish Balay 
4636d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
46457b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
46557b952d6SSatish Balay   }
46657b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
46757b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
46857b952d6SSatish Balay 
46957b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
47057b952d6SSatish Balay   return 0;
47157b952d6SSatish Balay }
47257b952d6SSatish Balay 
4735615d1e5SSatish Balay #undef __FUNC__
4745615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
47557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
47657b952d6SSatish Balay {
47757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
47857b952d6SSatish Balay   int          ierr;
47957b952d6SSatish Balay 
48057b952d6SSatish Balay   if (baij->size == 1) {
48157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
48257b952d6SSatish Balay   }
483e3372554SBarry Smith   else SETERRQ(1,0,"Only uniprocessor output supported");
48457b952d6SSatish Balay   return 0;
48557b952d6SSatish Balay }
48657b952d6SSatish Balay 
4875615d1e5SSatish Balay #undef __FUNC__
4885615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
48957b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
49057b952d6SSatish Balay {
49157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
492cee3aa6bSSatish Balay   int          ierr, format,rank,bs = baij->bs;
49357b952d6SSatish Balay   FILE         *fd;
49457b952d6SSatish Balay   ViewerType   vtype;
49557b952d6SSatish Balay 
49657b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
49757b952d6SSatish Balay   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
49857b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
499639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
5004e220ebcSLois Curfman McInnes       MatInfo info;
50157b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
50257b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
5034e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
50457b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
50557b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
5064e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
5074e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
5084e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
5094e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
5104e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
5114e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
51257b952d6SSatish Balay       fflush(fd);
51357b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
51457b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
51557b952d6SSatish Balay       return 0;
51657b952d6SSatish Balay     }
517639f9d9dSBarry Smith     else if (format == VIEWER_FORMAT_ASCII_INFO) {
518bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
51957b952d6SSatish Balay       return 0;
52057b952d6SSatish Balay     }
52157b952d6SSatish Balay   }
52257b952d6SSatish Balay 
52357b952d6SSatish Balay   if (vtype == DRAW_VIEWER) {
52457b952d6SSatish Balay     Draw       draw;
52557b952d6SSatish Balay     PetscTruth isnull;
52657b952d6SSatish Balay     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
52757b952d6SSatish Balay     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
52857b952d6SSatish Balay   }
52957b952d6SSatish Balay 
53057b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER) {
53157b952d6SSatish Balay     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
53257b952d6SSatish Balay     PetscSequentialPhaseBegin(mat->comm,1);
53357b952d6SSatish Balay     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
53457b952d6SSatish Balay            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
53557b952d6SSatish Balay             baij->cstart*bs,baij->cend*bs);
53657b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
53757b952d6SSatish Balay     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
53857b952d6SSatish Balay     fflush(fd);
53957b952d6SSatish Balay     PetscSequentialPhaseEnd(mat->comm,1);
54057b952d6SSatish Balay   }
54157b952d6SSatish Balay   else {
54257b952d6SSatish Balay     int size = baij->size;
54357b952d6SSatish Balay     rank = baij->rank;
54457b952d6SSatish Balay     if (size == 1) {
54557b952d6SSatish Balay       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
54657b952d6SSatish Balay     }
54757b952d6SSatish Balay     else {
54857b952d6SSatish Balay       /* assemble the entire matrix onto first processor. */
54957b952d6SSatish Balay       Mat         A;
55057b952d6SSatish Balay       Mat_SeqBAIJ *Aloc;
55157b952d6SSatish Balay       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
55257b952d6SSatish Balay       int         mbs=baij->mbs;
55357b952d6SSatish Balay       Scalar      *a;
55457b952d6SSatish Balay 
55557b952d6SSatish Balay       if (!rank) {
556cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
55757b952d6SSatish Balay         CHKERRQ(ierr);
55857b952d6SSatish Balay       }
55957b952d6SSatish Balay       else {
560cee3aa6bSSatish Balay         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
56157b952d6SSatish Balay         CHKERRQ(ierr);
56257b952d6SSatish Balay       }
56357b952d6SSatish Balay       PLogObjectParent(mat,A);
56457b952d6SSatish Balay 
56557b952d6SSatish Balay       /* copy over the A part */
56657b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->A->data;
56757b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
56857b952d6SSatish Balay       row = baij->rstart;
56957b952d6SSatish Balay       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
57057b952d6SSatish Balay 
57157b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
57257b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
57357b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
57457b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
57557b952d6SSatish Balay           col = (baij->cstart+aj[j])*bs;
57657b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
577cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
578cee3aa6bSSatish Balay             col++; a += bs;
57957b952d6SSatish Balay           }
58057b952d6SSatish Balay         }
58157b952d6SSatish Balay       }
58257b952d6SSatish Balay       /* copy over the B part */
58357b952d6SSatish Balay       Aloc = (Mat_SeqBAIJ*) baij->B->data;
58457b952d6SSatish Balay       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
58557b952d6SSatish Balay       row = baij->rstart*bs;
58657b952d6SSatish Balay       for ( i=0; i<mbs; i++ ) {
58757b952d6SSatish Balay         rvals[0] = bs*(baij->rstart + i);
58857b952d6SSatish Balay         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
58957b952d6SSatish Balay         for ( j=ai[i]; j<ai[i+1]; j++ ) {
59057b952d6SSatish Balay           col = baij->garray[aj[j]]*bs;
59157b952d6SSatish Balay           for (k=0; k<bs; k++ ) {
592cee3aa6bSSatish Balay             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
593cee3aa6bSSatish Balay             col++; a += bs;
59457b952d6SSatish Balay           }
59557b952d6SSatish Balay         }
59657b952d6SSatish Balay       }
59757b952d6SSatish Balay       PetscFree(rvals);
5986d4a8577SBarry Smith       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
5996d4a8577SBarry Smith       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
60057b952d6SSatish Balay       if (!rank) {
60157b952d6SSatish Balay         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
60257b952d6SSatish Balay       }
60357b952d6SSatish Balay       ierr = MatDestroy(A); CHKERRQ(ierr);
60457b952d6SSatish Balay     }
60557b952d6SSatish Balay   }
60657b952d6SSatish Balay   return 0;
60757b952d6SSatish Balay }
60857b952d6SSatish Balay 
60957b952d6SSatish Balay 
61057b952d6SSatish Balay 
6115615d1e5SSatish Balay #undef __FUNC__
6125615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
61357b952d6SSatish Balay static int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
61457b952d6SSatish Balay {
61557b952d6SSatish Balay   Mat         mat = (Mat) obj;
61657b952d6SSatish Balay   int         ierr;
61757b952d6SSatish Balay   ViewerType  vtype;
61857b952d6SSatish Balay 
61957b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
62057b952d6SSatish Balay   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
62157b952d6SSatish Balay       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
62257b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
62357b952d6SSatish Balay   }
62457b952d6SSatish Balay   else if (vtype == BINARY_FILE_VIEWER) {
62557b952d6SSatish Balay     return MatView_MPIBAIJ_Binary(mat,viewer);
62657b952d6SSatish Balay   }
62757b952d6SSatish Balay   return 0;
62857b952d6SSatish Balay }
62957b952d6SSatish Balay 
6305615d1e5SSatish Balay #undef __FUNC__
6315615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
63279bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
63379bdfe76SSatish Balay {
63479bdfe76SSatish Balay   Mat         mat = (Mat) obj;
63579bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
63679bdfe76SSatish Balay   int         ierr;
63779bdfe76SSatish Balay 
63879bdfe76SSatish Balay #if defined(PETSC_LOG)
63979bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
64079bdfe76SSatish Balay #endif
64179bdfe76SSatish Balay 
64279bdfe76SSatish Balay   PetscFree(baij->rowners);
64379bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
64479bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
64579bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
64679bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
64779bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
64879bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
64979bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
65079bdfe76SSatish Balay   PetscFree(baij);
65190f02eecSBarry Smith   if (mat->mapping) {
65290f02eecSBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
65390f02eecSBarry Smith   }
65479bdfe76SSatish Balay   PLogObjectDestroy(mat);
65579bdfe76SSatish Balay   PetscHeaderDestroy(mat);
65679bdfe76SSatish Balay   return 0;
65779bdfe76SSatish Balay }
65879bdfe76SSatish Balay 
6595615d1e5SSatish Balay #undef __FUNC__
6605615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
661cee3aa6bSSatish Balay static int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
662cee3aa6bSSatish Balay {
663cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
66447b4a8eaSLois Curfman McInnes   int         ierr, nt;
665cee3aa6bSSatish Balay 
666c16cb8f2SBarry Smith   VecGetLocalSize_Fast(xx,nt);
66747b4a8eaSLois Curfman McInnes   if (nt != a->n) {
668e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and xx");
66947b4a8eaSLois Curfman McInnes   }
670c16cb8f2SBarry Smith   VecGetLocalSize_Fast(yy,nt);
67147b4a8eaSLois Curfman McInnes   if (nt != a->m) {
672e3372554SBarry Smith     SETERRQ(1,0,"Incompatible parition of A and yy");
67347b4a8eaSLois Curfman McInnes   }
67443a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
675cee3aa6bSSatish Balay   ierr = (*a->A->ops.mult)(a->A,xx,yy); CHKERRQ(ierr);
67643a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
677cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
67843a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
679cee3aa6bSSatish Balay   return 0;
680cee3aa6bSSatish Balay }
681cee3aa6bSSatish Balay 
6825615d1e5SSatish Balay #undef __FUNC__
6835615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
684cee3aa6bSSatish Balay static int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
685cee3aa6bSSatish Balay {
686cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
687cee3aa6bSSatish Balay   int        ierr;
68843a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
689cee3aa6bSSatish Balay   ierr = (*a->A->ops.multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
69043a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
691cee3aa6bSSatish Balay   ierr = (*a->B->ops.multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
692cee3aa6bSSatish Balay   return 0;
693cee3aa6bSSatish Balay }
694cee3aa6bSSatish Balay 
6955615d1e5SSatish Balay #undef __FUNC__
6965615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
697cee3aa6bSSatish Balay static int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
698cee3aa6bSSatish Balay {
699cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
700cee3aa6bSSatish Balay   int        ierr;
701cee3aa6bSSatish Balay 
702cee3aa6bSSatish Balay   /* do nondiagonal part */
703cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
704cee3aa6bSSatish Balay   /* send it on its way */
705537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
706cee3aa6bSSatish Balay   /* do local part */
707cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtrans)(a->A,xx,yy); CHKERRQ(ierr);
708cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
709cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
710cee3aa6bSSatish Balay   /* but is not perhaps always true. */
711639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
712cee3aa6bSSatish Balay   return 0;
713cee3aa6bSSatish Balay }
714cee3aa6bSSatish Balay 
7155615d1e5SSatish Balay #undef __FUNC__
7165615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
717cee3aa6bSSatish Balay static int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
718cee3aa6bSSatish Balay {
719cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
720cee3aa6bSSatish Balay   int        ierr;
721cee3aa6bSSatish Balay 
722cee3aa6bSSatish Balay   /* do nondiagonal part */
723cee3aa6bSSatish Balay   ierr = (*a->B->ops.multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
724cee3aa6bSSatish Balay   /* send it on its way */
725537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
726cee3aa6bSSatish Balay   /* do local part */
727cee3aa6bSSatish Balay   ierr = (*a->A->ops.multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
728cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
729cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
730cee3aa6bSSatish Balay   /* but is not perhaps always true. */
731537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
732cee3aa6bSSatish Balay   return 0;
733cee3aa6bSSatish Balay }
734cee3aa6bSSatish Balay 
735cee3aa6bSSatish Balay /*
736cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
737cee3aa6bSSatish Balay    diagonal block
738cee3aa6bSSatish Balay */
7395615d1e5SSatish Balay #undef __FUNC__
7405615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
741cee3aa6bSSatish Balay static int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
742cee3aa6bSSatish Balay {
743cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
744cee3aa6bSSatish Balay   if (a->M != a->N)
745e3372554SBarry Smith     SETERRQ(1,0,"Supports only square matrix where A->A is diag block");
746cee3aa6bSSatish Balay   return MatGetDiagonal(a->A,v);
747cee3aa6bSSatish Balay }
748cee3aa6bSSatish Balay 
7495615d1e5SSatish Balay #undef __FUNC__
7505615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
751cee3aa6bSSatish Balay static int MatScale_MPIBAIJ(Scalar *aa,Mat A)
752cee3aa6bSSatish Balay {
753cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
754cee3aa6bSSatish Balay   int        ierr;
755cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
756cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
757cee3aa6bSSatish Balay   return 0;
758cee3aa6bSSatish Balay }
759026e39d0SSatish Balay 
7605615d1e5SSatish Balay #undef __FUNC__
7615615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
76257b952d6SSatish Balay static int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
76357b952d6SSatish Balay {
76457b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
76557b952d6SSatish Balay   *m = mat->M; *n = mat->N;
76657b952d6SSatish Balay   return 0;
76757b952d6SSatish Balay }
76857b952d6SSatish Balay 
7695615d1e5SSatish Balay #undef __FUNC__
7705615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
77157b952d6SSatish Balay static int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
77257b952d6SSatish Balay {
77357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
77457b952d6SSatish Balay   *m = mat->m; *n = mat->N;
77557b952d6SSatish Balay   return 0;
77657b952d6SSatish Balay }
77757b952d6SSatish Balay 
7785615d1e5SSatish Balay #undef __FUNC__
7795615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
78057b952d6SSatish Balay static int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
78157b952d6SSatish Balay {
78257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
78357b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
78457b952d6SSatish Balay   return 0;
78557b952d6SSatish Balay }
78657b952d6SSatish Balay 
787acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
788acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
789acdf5bf4SSatish Balay 
7905615d1e5SSatish Balay #undef __FUNC__
7915615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
792acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
793acdf5bf4SSatish Balay {
794acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
795acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
796acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
797d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
798d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
799acdf5bf4SSatish Balay 
800e3372554SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(1,0,"Already active");
801acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
802acdf5bf4SSatish Balay 
803acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
804acdf5bf4SSatish Balay     /*
805acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
806acdf5bf4SSatish Balay     */
807acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
808bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
809bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
810acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
811acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
812acdf5bf4SSatish Balay     }
813acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
814acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
815acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
816acdf5bf4SSatish Balay   }
817acdf5bf4SSatish Balay 
818acdf5bf4SSatish Balay 
819e3372554SBarry Smith   if (row < brstart || row >= brend) SETERRQ(1,0,"Only local rows")
820d9d09a02SSatish Balay   lrow = row - brstart;
821acdf5bf4SSatish Balay 
822acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
823acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
824acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
825acdf5bf4SSatish Balay   ierr = (*mat->A->ops.getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
826acdf5bf4SSatish Balay   ierr = (*mat->B->ops.getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
827acdf5bf4SSatish Balay   nztot = nzA + nzB;
828acdf5bf4SSatish Balay 
829acdf5bf4SSatish Balay   cmap  = mat->garray;
830acdf5bf4SSatish Balay   if (v  || idx) {
831acdf5bf4SSatish Balay     if (nztot) {
832acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
833acdf5bf4SSatish Balay       int imark = -1;
834acdf5bf4SSatish Balay       if (v) {
835acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
836acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
837d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
838acdf5bf4SSatish Balay           else break;
839acdf5bf4SSatish Balay         }
840acdf5bf4SSatish Balay         imark = i;
841acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
842acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
843acdf5bf4SSatish Balay       }
844acdf5bf4SSatish Balay       if (idx) {
845acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
846acdf5bf4SSatish Balay         if (imark > -1) {
847acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
848bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
849acdf5bf4SSatish Balay           }
850acdf5bf4SSatish Balay         } else {
851acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
852d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
853d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
854acdf5bf4SSatish Balay             else break;
855acdf5bf4SSatish Balay           }
856acdf5bf4SSatish Balay           imark = i;
857acdf5bf4SSatish Balay         }
858d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
859d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
860acdf5bf4SSatish Balay       }
861acdf5bf4SSatish Balay     }
862d212a18eSSatish Balay     else {
863d212a18eSSatish Balay       if (idx) *idx = 0;
864d212a18eSSatish Balay       if (v)   *v   = 0;
865d212a18eSSatish Balay     }
866acdf5bf4SSatish Balay   }
867acdf5bf4SSatish Balay   *nz = nztot;
868acdf5bf4SSatish Balay   ierr = (*mat->A->ops.restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
869acdf5bf4SSatish Balay   ierr = (*mat->B->ops.restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
870acdf5bf4SSatish Balay   return 0;
871acdf5bf4SSatish Balay }
872acdf5bf4SSatish Balay 
8735615d1e5SSatish Balay #undef __FUNC__
8745615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
875acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
876acdf5bf4SSatish Balay {
877acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
878acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
879e3372554SBarry Smith     SETERRQ(1,0,"MatGetRow not called");
880acdf5bf4SSatish Balay   }
881acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
882acdf5bf4SSatish Balay   return 0;
883acdf5bf4SSatish Balay }
884acdf5bf4SSatish Balay 
8855615d1e5SSatish Balay #undef __FUNC__
8865615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
8875a838052SSatish Balay static int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
8885a838052SSatish Balay {
8895a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
8905a838052SSatish Balay   *bs = baij->bs;
8915a838052SSatish Balay   return 0;
8925a838052SSatish Balay }
8935a838052SSatish Balay 
8945615d1e5SSatish Balay #undef __FUNC__
8955615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
89658667388SSatish Balay static int MatZeroEntries_MPIBAIJ(Mat A)
89758667388SSatish Balay {
89858667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
89958667388SSatish Balay   int         ierr;
90058667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
90158667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
90258667388SSatish Balay   return 0;
90358667388SSatish Balay }
9040ac07820SSatish Balay 
9055615d1e5SSatish Balay #undef __FUNC__
9065615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
9074e220ebcSLois Curfman McInnes static int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
9080ac07820SSatish Balay {
9094e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
9104e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
9117d57db60SLois Curfman McInnes   int         ierr;
9127d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
9130ac07820SSatish Balay 
9144e220ebcSLois Curfman McInnes   info->rows_global    = (double)a->M;
9154e220ebcSLois Curfman McInnes   info->columns_global = (double)a->N;
9164e220ebcSLois Curfman McInnes   info->rows_local     = (double)a->m;
9174e220ebcSLois Curfman McInnes   info->columns_local  = (double)a->N;
9184e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
9194e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
9204e220ebcSLois Curfman McInnes   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
9214e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
9224e220ebcSLois Curfman McInnes   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
9230ac07820SSatish Balay   if (flag == MAT_LOCAL) {
9244e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
9254e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
9264e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
9274e220ebcSLois Curfman McInnes     info->memory       = isend[3];
9284e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
9290ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
9300ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_MAX,matin->comm);
9314e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9324e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9334e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9344e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9354e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
9360ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
9370ac07820SSatish Balay     MPI_Allreduce(isend,irecv,3,MPI_INT,MPI_SUM,matin->comm);
9384e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
9394e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
9404e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
9414e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
9424e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
9430ac07820SSatish Balay   }
9444e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
9454e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
9464e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
9470ac07820SSatish Balay   return 0;
9480ac07820SSatish Balay }
9490ac07820SSatish Balay 
9505615d1e5SSatish Balay #undef __FUNC__
9515615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
95258667388SSatish Balay static int MatSetOption_MPIBAIJ(Mat A,MatOption op)
95358667388SSatish Balay {
95458667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
95558667388SSatish Balay 
95658667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
95758667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
9586da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
959b1fbbac0SLois Curfman McInnes       op == MAT_COLUMNS_SORTED) {
960b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
961b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
962b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
963aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
96458667388SSatish Balay         MatSetOption(a->A,op);
96558667388SSatish Balay         MatSetOption(a->B,op);
966b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
9676da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
96858667388SSatish Balay              op == MAT_SYMMETRIC ||
96958667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
97058667388SSatish Balay              op == MAT_YES_NEW_DIAGONALS)
97158667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
97258667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
97358667388SSatish Balay     a->roworiented = 0;
97458667388SSatish Balay     MatSetOption(a->A,op);
97558667388SSatish Balay     MatSetOption(a->B,op);
97690f02eecSBarry Smith   } else if (op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) {
97790f02eecSBarry Smith     a->donotstash = 1;
97890f02eecSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS)
979e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");}
98058667388SSatish Balay   else
981e3372554SBarry Smith     {SETERRQ(PETSC_ERR_SUP,0,"unknown option");}
98258667388SSatish Balay   return 0;
98358667388SSatish Balay }
98458667388SSatish Balay 
9855615d1e5SSatish Balay #undef __FUNC__
9865615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
9870ac07820SSatish Balay static int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
9880ac07820SSatish Balay {
9890ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
9900ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
9910ac07820SSatish Balay   Mat        B;
9920ac07820SSatish Balay   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
9930ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
9940ac07820SSatish Balay   Scalar     *a;
9950ac07820SSatish Balay 
9960ac07820SSatish Balay   if (matout == PETSC_NULL && M != N)
997e3372554SBarry Smith     SETERRQ(1,0,"Square matrix only for in-place");
9980ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
9990ac07820SSatish Balay   CHKERRQ(ierr);
10000ac07820SSatish Balay 
10010ac07820SSatish Balay   /* copy over the A part */
10020ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
10030ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
10040ac07820SSatish Balay   row = baij->rstart;
10050ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
10060ac07820SSatish Balay 
10070ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
10080ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
10090ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
10100ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
10110ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
10120ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
10130ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
10140ac07820SSatish Balay         col++; a += bs;
10150ac07820SSatish Balay       }
10160ac07820SSatish Balay     }
10170ac07820SSatish Balay   }
10180ac07820SSatish Balay   /* copy over the B part */
10190ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
10200ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
10210ac07820SSatish Balay   row = baij->rstart*bs;
10220ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
10230ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
10240ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
10250ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
10260ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
10270ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
10280ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
10290ac07820SSatish Balay         col++; a += bs;
10300ac07820SSatish Balay       }
10310ac07820SSatish Balay     }
10320ac07820SSatish Balay   }
10330ac07820SSatish Balay   PetscFree(rvals);
10340ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
10350ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
10360ac07820SSatish Balay 
10370ac07820SSatish Balay   if (matout != PETSC_NULL) {
10380ac07820SSatish Balay     *matout = B;
10390ac07820SSatish Balay   } else {
10400ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
10410ac07820SSatish Balay     PetscFree(baij->rowners);
10420ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
10430ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
10440ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
10450ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
10460ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
10470ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
10480ac07820SSatish Balay     PetscFree(baij);
10490ac07820SSatish Balay     PetscMemcpy(A,B,sizeof(struct _Mat));
10500ac07820SSatish Balay     PetscHeaderDestroy(B);
10510ac07820SSatish Balay   }
10520ac07820SSatish Balay   return 0;
10530ac07820SSatish Balay }
10540e95ebc0SSatish Balay 
10555615d1e5SSatish Balay #undef __FUNC__
10565615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
10570e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
10580e95ebc0SSatish Balay {
10590e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
10600e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
10610e95ebc0SSatish Balay   int ierr,s1,s2,s3;
10620e95ebc0SSatish Balay 
10630e95ebc0SSatish Balay   if (ll)  {
10640e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
10650e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1066e3372554SBarry Smith     if (s1!=s2) SETERRQ(1,0,"non-conforming local sizes");
10670e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
10680e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
10690e95ebc0SSatish Balay   }
1070e3372554SBarry Smith   if (rr) SETERRQ(1,0,"not supported for right vector");
10710e95ebc0SSatish Balay   return 0;
10720e95ebc0SSatish Balay }
10730e95ebc0SSatish Balay 
10740ac07820SSatish Balay /* the code does not do the diagonal entries correctly unless the
10750ac07820SSatish Balay    matrix is square and the column and row owerships are identical.
10760ac07820SSatish Balay    This is a BUG. The only way to fix it seems to be to access
10770ac07820SSatish Balay    baij->A and baij->B directly and not through the MatZeroRows()
10780ac07820SSatish Balay    routine.
10790ac07820SSatish Balay */
10805615d1e5SSatish Balay #undef __FUNC__
10815615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
10820ac07820SSatish Balay static int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
10830ac07820SSatish Balay {
10840ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
10850ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
10860ac07820SSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work;
10870ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
10880ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
10890ac07820SSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs;
10900ac07820SSatish Balay   MPI_Comm       comm = A->comm;
10910ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
10920ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
10930ac07820SSatish Balay   IS             istmp;
10940ac07820SSatish Balay 
10950ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
10960ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
10970ac07820SSatish Balay 
10980ac07820SSatish Balay   /*  first count number of contributors to each processor */
10990ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
11000ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
11010ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
11020ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
11030ac07820SSatish Balay     idx = rows[i];
11040ac07820SSatish Balay     found = 0;
11050ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
11060ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
11070ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
11080ac07820SSatish Balay       }
11090ac07820SSatish Balay     }
1110e3372554SBarry Smith     if (!found) SETERRQ(1,0,"Index out of range");
11110ac07820SSatish Balay   }
11120ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
11130ac07820SSatish Balay 
11140ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
11150ac07820SSatish Balay   work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
11160ac07820SSatish Balay   MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);
11170ac07820SSatish Balay   nrecvs = work[rank];
11180ac07820SSatish Balay   MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);
11190ac07820SSatish Balay   nmax = work[rank];
11200ac07820SSatish Balay   PetscFree(work);
11210ac07820SSatish Balay 
11220ac07820SSatish Balay   /* post receives:   */
11230ac07820SSatish Balay   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); /*see note */
11240ac07820SSatish Balay   CHKPTRQ(rvalues);
11250ac07820SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));
11260ac07820SSatish Balay   CHKPTRQ(recv_waits);
11270ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
11280ac07820SSatish Balay     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
11290ac07820SSatish Balay   }
11300ac07820SSatish Balay 
11310ac07820SSatish Balay   /* do sends:
11320ac07820SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
11330ac07820SSatish Balay          the ith processor
11340ac07820SSatish Balay   */
11350ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
11360ac07820SSatish Balay   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));
11370ac07820SSatish Balay   CHKPTRQ(send_waits);
11380ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
11390ac07820SSatish Balay   starts[0] = 0;
11400ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
11410ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
11420ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
11430ac07820SSatish Balay   }
11440ac07820SSatish Balay   ISRestoreIndices(is,&rows);
11450ac07820SSatish Balay 
11460ac07820SSatish Balay   starts[0] = 0;
11470ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
11480ac07820SSatish Balay   count = 0;
11490ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
11500ac07820SSatish Balay     if (procs[i]) {
11510ac07820SSatish Balay       MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);
11520ac07820SSatish Balay     }
11530ac07820SSatish Balay   }
11540ac07820SSatish Balay   PetscFree(starts);
11550ac07820SSatish Balay 
11560ac07820SSatish Balay   base = owners[rank]*bs;
11570ac07820SSatish Balay 
11580ac07820SSatish Balay   /*  wait on receives */
11590ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
11600ac07820SSatish Balay   source = lens + nrecvs;
11610ac07820SSatish Balay   count  = nrecvs; slen = 0;
11620ac07820SSatish Balay   while (count) {
11630ac07820SSatish Balay     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
11640ac07820SSatish Balay     /* unpack receives into our local space */
11650ac07820SSatish Balay     MPI_Get_count(&recv_status,MPI_INT,&n);
11660ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
11670ac07820SSatish Balay     lens[imdex]  = n;
11680ac07820SSatish Balay     slen += n;
11690ac07820SSatish Balay     count--;
11700ac07820SSatish Balay   }
11710ac07820SSatish Balay   PetscFree(recv_waits);
11720ac07820SSatish Balay 
11730ac07820SSatish Balay   /* move the data into the send scatter */
11740ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
11750ac07820SSatish Balay   count = 0;
11760ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
11770ac07820SSatish Balay     values = rvalues + i*nmax;
11780ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
11790ac07820SSatish Balay       lrows[count++] = values[j] - base;
11800ac07820SSatish Balay     }
11810ac07820SSatish Balay   }
11820ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
11830ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
11840ac07820SSatish Balay 
11850ac07820SSatish Balay   /* actually zap the local rows */
1186537820f0SBarry Smith   ierr = ISCreateGeneral(MPI_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
11870ac07820SSatish Balay   PLogObjectParent(A,istmp);
11880ac07820SSatish Balay   PetscFree(lrows);
11890ac07820SSatish Balay   ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
11900ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
11910ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
11920ac07820SSatish Balay 
11930ac07820SSatish Balay   /* wait on sends */
11940ac07820SSatish Balay   if (nsends) {
11950ac07820SSatish Balay     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));
11960ac07820SSatish Balay     CHKPTRQ(send_status);
11970ac07820SSatish Balay     MPI_Waitall(nsends,send_waits,send_status);
11980ac07820SSatish Balay     PetscFree(send_status);
11990ac07820SSatish Balay   }
12000ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
12010ac07820SSatish Balay 
12020ac07820SSatish Balay   return 0;
12030ac07820SSatish Balay }
1204ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
12055615d1e5SSatish Balay #undef __FUNC__
12065615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1207ba4ca20aSSatish Balay static int MatPrintHelp_MPIBAIJ(Mat A)
1208ba4ca20aSSatish Balay {
1209ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1210ba4ca20aSSatish Balay 
1211ba4ca20aSSatish Balay   if (!a->rank) return MatPrintHelp_SeqBAIJ(a->A);
1212ba4ca20aSSatish Balay   else return 0;
1213ba4ca20aSSatish Balay }
12140ac07820SSatish Balay 
12155615d1e5SSatish Balay #undef __FUNC__
12165615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1217bb5a7306SBarry Smith static int MatSetUnfactored_MPIBAIJ(Mat A)
1218bb5a7306SBarry Smith {
1219bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1220bb5a7306SBarry Smith   int         ierr;
1221bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1222bb5a7306SBarry Smith   return 0;
1223bb5a7306SBarry Smith }
1224bb5a7306SBarry Smith 
12250ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
12260ac07820SSatish Balay 
122779bdfe76SSatish Balay /* -------------------------------------------------------------------*/
122879bdfe76SSatish Balay static struct _MatOps MatOps = {
1229bd16c2feSSatish Balay   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
12304c50302cSBarry Smith   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
12314c50302cSBarry Smith   0,0,0,0,
12320ac07820SSatish Balay   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
12330e95ebc0SSatish Balay   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
123458667388SSatish Balay   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
12354c50302cSBarry Smith   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
12364c50302cSBarry Smith   0,0,0,MatGetSize_MPIBAIJ,
12374c50302cSBarry Smith   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1238905e6a2fSBarry Smith   0,0,0,MatConvertSameType_MPIBAIJ,0,0,
1239d212a18eSSatish Balay   0,0,0,MatGetSubMatrices_MPIBAIJ,
1240ba4ca20aSSatish Balay   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1241bb5a7306SBarry Smith   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1242bb5a7306SBarry Smith   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ};
124379bdfe76SSatish Balay 
124479bdfe76SSatish Balay 
12455615d1e5SSatish Balay #undef __FUNC__
12465615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
124779bdfe76SSatish Balay /*@C
124879bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
124979bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
125079bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
125179bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
125279bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
125379bdfe76SSatish Balay 
125479bdfe76SSatish Balay    Input Parameters:
125579bdfe76SSatish Balay .  comm - MPI communicator
125679bdfe76SSatish Balay .  bs   - size of blockk
125779bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
125892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
125992e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
126092e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
126192e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
126292e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
126379bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
126492e8d321SLois Curfman McInnes .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
126579bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
126679bdfe76SSatish Balay            submatrix  (same for all local rows)
126792e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
126892e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
126992e8d321SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
127092e8d321SLois Curfman McInnes            it is zero.
127192e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
127279bdfe76SSatish Balay            submatrix (same for all local rows).
127392e8d321SLois Curfman McInnes .  o_nzz - array containing the number of nonzeros in the various block rows of the
127492e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
127592e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
127679bdfe76SSatish Balay 
127779bdfe76SSatish Balay    Output Parameter:
127879bdfe76SSatish Balay .  A - the matrix
127979bdfe76SSatish Balay 
128079bdfe76SSatish Balay    Notes:
128179bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
128279bdfe76SSatish Balay    (possibly both).
128379bdfe76SSatish Balay 
128479bdfe76SSatish Balay    Storage Information:
128579bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
128679bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
128779bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
128879bdfe76SSatish Balay    local matrix (a rectangular submatrix).
128979bdfe76SSatish Balay 
129079bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
129179bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
129279bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
129379bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
129479bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
129579bdfe76SSatish Balay 
129679bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
129779bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
129879bdfe76SSatish Balay 
129979bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
130079bdfe76SSatish Balay $         -------------------
130179bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
130279bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
130379bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
130479bdfe76SSatish Balay $         -------------------
130579bdfe76SSatish Balay $
130679bdfe76SSatish Balay 
130779bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
130879bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
130979bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
131057b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
131179bdfe76SSatish Balay 
131279bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
131379bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
131479bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
131592e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
131692e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
13176da5968aSLois Curfman McInnes    matrices.
131879bdfe76SSatish Balay 
131992e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
132079bdfe76SSatish Balay 
132179bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
132279bdfe76SSatish Balay @*/
132379bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
132479bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
132579bdfe76SSatish Balay {
132679bdfe76SSatish Balay   Mat          B;
132779bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
13284c50302cSBarry Smith   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
132979bdfe76SSatish Balay 
1330e3372554SBarry Smith   if (bs < 1) SETERRQ(1,0,"invalid block size specified");
133179bdfe76SSatish Balay   *A = 0;
133279bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
133379bdfe76SSatish Balay   PLogObjectCreate(B);
133479bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
133579bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
133679bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
13374c50302cSBarry Smith   MPI_Comm_size(comm,&size);
13384c50302cSBarry Smith   if (size == 1) {
13394c50302cSBarry Smith     B->ops.getrowij          = MatGetRowIJ_MPIBAIJ;
13404c50302cSBarry Smith     B->ops.restorerowij      = MatRestoreRowIJ_MPIBAIJ;
13414c50302cSBarry Smith     B->ops.lufactorsymbolic  = MatLUFactorSymbolic_MPIBAIJ;
13424c50302cSBarry Smith     B->ops.lufactornumeric   = MatLUFactorNumeric_MPIBAIJ;
13434c50302cSBarry Smith     B->ops.lufactor          = MatLUFactor_MPIBAIJ;
13444c50302cSBarry Smith     B->ops.solve             = MatSolve_MPIBAIJ;
13454c50302cSBarry Smith     B->ops.solveadd          = MatSolveAdd_MPIBAIJ;
13464c50302cSBarry Smith     B->ops.solvetrans        = MatSolveTrans_MPIBAIJ;
13474c50302cSBarry Smith     B->ops.solvetransadd     = MatSolveTransAdd_MPIBAIJ;
13484c50302cSBarry Smith     B->ops.ilufactorsymbolic = MatILUFactorSymbolic_MPIBAIJ;
13494c50302cSBarry Smith   }
13504c50302cSBarry Smith 
135179bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
135279bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
135390f02eecSBarry Smith   B->mapping    = 0;
135479bdfe76SSatish Balay   B->factor     = 0;
135579bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
135679bdfe76SSatish Balay 
135779bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
135879bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
135979bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
136079bdfe76SSatish Balay 
136179bdfe76SSatish Balay   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
1362e3372554SBarry Smith     SETERRQ(1,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1363e3372554SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) SETERRQ(1,0,"either M or m should be specified");
1364e3372554SBarry Smith   if ( M == PETSC_DECIDE && n == PETSC_DECIDE)SETERRQ(1,0,"either N or n should be specified");
1365cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1366cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
136779bdfe76SSatish Balay 
136879bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
136979bdfe76SSatish Balay     work[0] = m; work[1] = n;
137079bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
137179bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
137279bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
137379bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
137479bdfe76SSatish Balay   }
137579bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
137679bdfe76SSatish Balay     Mbs = M/bs;
1377e3372554SBarry Smith     if (Mbs*bs != M) SETERRQ(1,0,"No of global rows must be divisible by blocksize");
137879bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
137979bdfe76SSatish Balay     m   = mbs*bs;
138079bdfe76SSatish Balay   }
138179bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
138279bdfe76SSatish Balay     Nbs = N/bs;
1383e3372554SBarry Smith     if (Nbs*bs != N) SETERRQ(1,0,"No of global cols must be divisible by blocksize");
138479bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
138579bdfe76SSatish Balay     n   = nbs*bs;
138679bdfe76SSatish Balay   }
1387e3372554SBarry Smith   if (mbs*bs != m || nbs*bs != n) SETERRQ(1,0,"No of local rows, cols must be divisible by blocksize");
138879bdfe76SSatish Balay 
138979bdfe76SSatish Balay   b->m = m; B->m = m;
139079bdfe76SSatish Balay   b->n = n; B->n = n;
139179bdfe76SSatish Balay   b->N = N; B->N = N;
139279bdfe76SSatish Balay   b->M = M; B->M = M;
139379bdfe76SSatish Balay   b->bs  = bs;
139479bdfe76SSatish Balay   b->bs2 = bs*bs;
139579bdfe76SSatish Balay   b->mbs = mbs;
139679bdfe76SSatish Balay   b->nbs = nbs;
139779bdfe76SSatish Balay   b->Mbs = Mbs;
139879bdfe76SSatish Balay   b->Nbs = Nbs;
139979bdfe76SSatish Balay 
140079bdfe76SSatish Balay   /* build local table of row and column ownerships */
140179bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
140279bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
14030ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
140479bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
140579bdfe76SSatish Balay   b->rowners[0] = 0;
140679bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
140779bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
140879bdfe76SSatish Balay   }
140979bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
141079bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
14114fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
14124fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
14134fa0d573SSatish Balay 
141479bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
141579bdfe76SSatish Balay   b->cowners[0] = 0;
141679bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
141779bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
141879bdfe76SSatish Balay   }
141979bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
142079bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
14214fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
14224fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
142379bdfe76SSatish Balay 
142479bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
142579bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
142679bdfe76SSatish Balay   PLogObjectParent(B,b->A);
142779bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
142879bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
142979bdfe76SSatish Balay   PLogObjectParent(B,b->B);
143079bdfe76SSatish Balay 
143179bdfe76SSatish Balay   /* build cache for off array entries formed */
143279bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
143390f02eecSBarry Smith   b->donotstash  = 0;
143479bdfe76SSatish Balay   b->colmap      = 0;
143579bdfe76SSatish Balay   b->garray      = 0;
143679bdfe76SSatish Balay   b->roworiented = 1;
143779bdfe76SSatish Balay 
143879bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
143979bdfe76SSatish Balay   b->lvec      = 0;
144079bdfe76SSatish Balay   b->Mvctx     = 0;
144179bdfe76SSatish Balay 
144279bdfe76SSatish Balay   /* stuff for MatGetRow() */
144379bdfe76SSatish Balay   b->rowindices   = 0;
144479bdfe76SSatish Balay   b->rowvalues    = 0;
144579bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
144679bdfe76SSatish Balay 
144779bdfe76SSatish Balay   *A = B;
144879bdfe76SSatish Balay   return 0;
144979bdfe76SSatish Balay }
1450026e39d0SSatish Balay 
14515615d1e5SSatish Balay #undef __FUNC__
14525615d1e5SSatish Balay #define __FUNC__ "MatConvertSameType_MPIBAIJ"
14530ac07820SSatish Balay static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
14540ac07820SSatish Balay {
14550ac07820SSatish Balay   Mat        mat;
14560ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
14570ac07820SSatish Balay   int        ierr, len=0, flg;
14580ac07820SSatish Balay 
14590ac07820SSatish Balay   *newmat       = 0;
14600ac07820SSatish Balay   PetscHeaderCreate(mat,_Mat,MAT_COOKIE,MATMPIBAIJ,matin->comm);
14610ac07820SSatish Balay   PLogObjectCreate(mat);
14620ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
14630ac07820SSatish Balay   PetscMemcpy(&mat->ops,&MatOps,sizeof(struct _MatOps));
14640ac07820SSatish Balay   mat->destroy    = MatDestroy_MPIBAIJ;
14650ac07820SSatish Balay   mat->view       = MatView_MPIBAIJ;
14660ac07820SSatish Balay   mat->factor     = matin->factor;
14670ac07820SSatish Balay   mat->assembled  = PETSC_TRUE;
14680ac07820SSatish Balay 
14690ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
14700ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
14710ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
14720ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
14730ac07820SSatish Balay 
14740ac07820SSatish Balay   a->bs  = oldmat->bs;
14750ac07820SSatish Balay   a->bs2 = oldmat->bs2;
14760ac07820SSatish Balay   a->mbs = oldmat->mbs;
14770ac07820SSatish Balay   a->nbs = oldmat->nbs;
14780ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
14790ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
14800ac07820SSatish Balay 
14810ac07820SSatish Balay   a->rstart       = oldmat->rstart;
14820ac07820SSatish Balay   a->rend         = oldmat->rend;
14830ac07820SSatish Balay   a->cstart       = oldmat->cstart;
14840ac07820SSatish Balay   a->cend         = oldmat->cend;
14850ac07820SSatish Balay   a->size         = oldmat->size;
14860ac07820SSatish Balay   a->rank         = oldmat->rank;
14870ac07820SSatish Balay   a->insertmode   = NOT_SET_VALUES;
14880ac07820SSatish Balay   a->rowvalues    = 0;
14890ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
14900ac07820SSatish Balay 
14910ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
14920ac07820SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
14930ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
14940ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
14950ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
14960ac07820SSatish Balay   if (oldmat->colmap) {
14970ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
14980ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
14990ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
15000ac07820SSatish Balay   } else a->colmap = 0;
15010ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
15020ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
15030ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
15040ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
15050ac07820SSatish Balay   } else a->garray = 0;
15060ac07820SSatish Balay 
15070ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
15080ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
15090ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
15100ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
15110ac07820SSatish Balay   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
15120ac07820SSatish Balay   PLogObjectParent(mat,a->A);
15130ac07820SSatish Balay   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
15140ac07820SSatish Balay   PLogObjectParent(mat,a->B);
15150ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
15160ac07820SSatish Balay   if (flg) {
15170ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
15180ac07820SSatish Balay   }
15190ac07820SSatish Balay   *newmat = mat;
15200ac07820SSatish Balay   return 0;
15210ac07820SSatish Balay }
152257b952d6SSatish Balay 
152357b952d6SSatish Balay #include "sys.h"
152457b952d6SSatish Balay 
15255615d1e5SSatish Balay #undef __FUNC__
15265615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
152757b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
152857b952d6SSatish Balay {
152957b952d6SSatish Balay   Mat          A;
153057b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
153157b952d6SSatish Balay   Scalar       *vals,*buf;
153257b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
153357b952d6SSatish Balay   MPI_Status   status;
1534cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
153557b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
153657b952d6SSatish Balay   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
153757b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
153857b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
153957b952d6SSatish Balay 
154057b952d6SSatish Balay 
154157b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
154257b952d6SSatish Balay   bs2  = bs*bs;
154357b952d6SSatish Balay 
154457b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
154557b952d6SSatish Balay   if (!rank) {
154657b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
154757b952d6SSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,BINARY_INT); CHKERRQ(ierr);
1548e3372554SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not matrix object");
154957b952d6SSatish Balay   }
155057b952d6SSatish Balay 
155157b952d6SSatish Balay   MPI_Bcast(header+1,3,MPI_INT,0,comm);
155257b952d6SSatish Balay   M = header[1]; N = header[2];
155357b952d6SSatish Balay 
1554e3372554SBarry Smith   if (M != N) SETERRQ(1,0,"Can only do square matrices");
155557b952d6SSatish Balay 
155657b952d6SSatish Balay   /*
155757b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
155857b952d6SSatish Balay      divisible by the blocksize
155957b952d6SSatish Balay   */
156057b952d6SSatish Balay   Mbs        = M/bs;
156157b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
156257b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
156357b952d6SSatish Balay   else                  Mbs++;
156457b952d6SSatish Balay   if (extra_rows &&!rank) {
1565b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
156657b952d6SSatish Balay   }
1567537820f0SBarry Smith 
156857b952d6SSatish Balay   /* determine ownership of all rows */
156957b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
157057b952d6SSatish Balay   m   = mbs * bs;
1571cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
1572cee3aa6bSSatish Balay   browners = rowners + size + 1;
157357b952d6SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
157457b952d6SSatish Balay   rowners[0] = 0;
1575cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
1576cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
157757b952d6SSatish Balay   rstart = rowners[rank];
157857b952d6SSatish Balay   rend   = rowners[rank+1];
157957b952d6SSatish Balay 
158057b952d6SSatish Balay   /* distribute row lengths to all processors */
158157b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
158257b952d6SSatish Balay   if (!rank) {
158357b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
158457b952d6SSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
158557b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
158657b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
1587cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
1588cee3aa6bSSatish Balay     MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
158957b952d6SSatish Balay     PetscFree(sndcounts);
159057b952d6SSatish Balay   }
159157b952d6SSatish Balay   else {
159257b952d6SSatish Balay     MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);
159357b952d6SSatish Balay   }
159457b952d6SSatish Balay 
159557b952d6SSatish Balay   if (!rank) {
159657b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
159757b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
159857b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
159957b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
160057b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
160157b952d6SSatish Balay         procsnz[i] += rowlengths[j];
160257b952d6SSatish Balay       }
160357b952d6SSatish Balay     }
160457b952d6SSatish Balay     PetscFree(rowlengths);
160557b952d6SSatish Balay 
160657b952d6SSatish Balay     /* determine max buffer needed and allocate it */
160757b952d6SSatish Balay     maxnz = 0;
160857b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
160957b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
161057b952d6SSatish Balay     }
161157b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
161257b952d6SSatish Balay 
161357b952d6SSatish Balay     /* read in my part of the matrix column indices  */
161457b952d6SSatish Balay     nz = procsnz[0];
161557b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
161657b952d6SSatish Balay     mycols = ibuf;
1617cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
161857b952d6SSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,BINARY_INT); CHKERRQ(ierr);
1619cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
1620cee3aa6bSSatish Balay 
162157b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
162257b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
162357b952d6SSatish Balay       nz = procsnz[i];
162457b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
162557b952d6SSatish Balay       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
162657b952d6SSatish Balay     }
162757b952d6SSatish Balay     /* read in the stuff for the last proc */
162857b952d6SSatish Balay     if ( size != 1 ) {
162957b952d6SSatish Balay       nz = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
163057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,BINARY_INT); CHKERRQ(ierr);
163157b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
1632cee3aa6bSSatish Balay       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
163357b952d6SSatish Balay     }
163457b952d6SSatish Balay     PetscFree(cols);
163557b952d6SSatish Balay   }
163657b952d6SSatish Balay   else {
163757b952d6SSatish Balay     /* determine buffer space needed for message */
163857b952d6SSatish Balay     nz = 0;
163957b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
164057b952d6SSatish Balay       nz += locrowlens[i];
164157b952d6SSatish Balay     }
164257b952d6SSatish Balay     ibuf = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
164357b952d6SSatish Balay     mycols = ibuf;
164457b952d6SSatish Balay     /* receive message of column indices*/
164557b952d6SSatish Balay     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
164657b952d6SSatish Balay     MPI_Get_count(&status,MPI_INT,&maxnz);
1647e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
164857b952d6SSatish Balay   }
164957b952d6SSatish Balay 
165057b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
1651cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
1652cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
165357b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
1654cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
165557b952d6SSatish Balay   masked1 = mask    + Mbs;
165657b952d6SSatish Balay   masked2 = masked1 + Mbs;
165757b952d6SSatish Balay   rowcount = 0; nzcount = 0;
165857b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
165957b952d6SSatish Balay     dcount  = 0;
166057b952d6SSatish Balay     odcount = 0;
166157b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
166257b952d6SSatish Balay       kmax = locrowlens[rowcount];
166357b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
166457b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
166557b952d6SSatish Balay         if (!mask[tmp]) {
166657b952d6SSatish Balay           mask[tmp] = 1;
166757b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
166857b952d6SSatish Balay           else masked1[dcount++] = tmp;
166957b952d6SSatish Balay         }
167057b952d6SSatish Balay       }
167157b952d6SSatish Balay       rowcount++;
167257b952d6SSatish Balay     }
1673cee3aa6bSSatish Balay 
167457b952d6SSatish Balay     dlens[i]  = dcount;
167557b952d6SSatish Balay     odlens[i] = odcount;
1676cee3aa6bSSatish Balay 
167757b952d6SSatish Balay     /* zero out the mask elements we set */
167857b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
167957b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
168057b952d6SSatish Balay   }
1681cee3aa6bSSatish Balay 
168257b952d6SSatish Balay   /* create our matrix */
1683537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
1684537820f0SBarry Smith          CHKERRQ(ierr);
168557b952d6SSatish Balay   A = *newmat;
16866d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
168757b952d6SSatish Balay 
168857b952d6SSatish Balay   if (!rank) {
168957b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
169057b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
169157b952d6SSatish Balay     nz = procsnz[0];
169257b952d6SSatish Balay     vals = buf;
1693cee3aa6bSSatish Balay     mycols = ibuf;
1694cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
169557b952d6SSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
1696cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
1697537820f0SBarry Smith 
169857b952d6SSatish Balay     /* insert into matrix */
169957b952d6SSatish Balay     jj      = rstart*bs;
170057b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
170157b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
170257b952d6SSatish Balay       mycols += locrowlens[i];
170357b952d6SSatish Balay       vals   += locrowlens[i];
170457b952d6SSatish Balay       jj++;
170557b952d6SSatish Balay     }
170657b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
170757b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
170857b952d6SSatish Balay       nz = procsnz[i];
170957b952d6SSatish Balay       vals = buf;
171057b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
171157b952d6SSatish Balay       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
171257b952d6SSatish Balay     }
171357b952d6SSatish Balay     /* the last proc */
171457b952d6SSatish Balay     if ( size != 1 ){
171557b952d6SSatish Balay       nz = procsnz[i] - extra_rows;
1716cee3aa6bSSatish Balay       vals = buf;
171757b952d6SSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,BINARY_SCALAR); CHKERRQ(ierr);
171857b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
1719cee3aa6bSSatish Balay       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
172057b952d6SSatish Balay     }
172157b952d6SSatish Balay     PetscFree(procsnz);
172257b952d6SSatish Balay   }
172357b952d6SSatish Balay   else {
172457b952d6SSatish Balay     /* receive numeric values */
172557b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
172657b952d6SSatish Balay 
172757b952d6SSatish Balay     /* receive message of values*/
172857b952d6SSatish Balay     vals = buf;
1729cee3aa6bSSatish Balay     mycols = ibuf;
173057b952d6SSatish Balay     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
173157b952d6SSatish Balay     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1732e3372554SBarry Smith     if (maxnz != nz) SETERRQ(1,0,"something is wrong with file");
173357b952d6SSatish Balay 
173457b952d6SSatish Balay     /* insert into matrix */
173557b952d6SSatish Balay     jj      = rstart*bs;
1736cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
173757b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
173857b952d6SSatish Balay       mycols += locrowlens[i];
173957b952d6SSatish Balay       vals   += locrowlens[i];
174057b952d6SSatish Balay       jj++;
174157b952d6SSatish Balay     }
174257b952d6SSatish Balay   }
174357b952d6SSatish Balay   PetscFree(locrowlens);
174457b952d6SSatish Balay   PetscFree(buf);
174557b952d6SSatish Balay   PetscFree(ibuf);
174657b952d6SSatish Balay   PetscFree(rowners);
174757b952d6SSatish Balay   PetscFree(dlens);
1748cee3aa6bSSatish Balay   PetscFree(mask);
17496d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
17506d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
175157b952d6SSatish Balay   return 0;
175257b952d6SSatish Balay }
175357b952d6SSatish Balay 
175457b952d6SSatish Balay 
1755