xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 3a40ed3dce77c081171d005ae1a6ff4bb9d13b6f)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*3a40ed3dSBarry Smith static char vcid[] = "$Id: mmbaij.c,v 1.14 1997/07/09 20:55:26 balay Exp bsmith $";
38016bdd1SSatish Balay #endif
48016bdd1SSatish Balay 
58016bdd1SSatish Balay 
68016bdd1SSatish Balay /*
7d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
88016bdd1SSatish Balay */
970f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
108016bdd1SSatish Balay #include "src/vec/vecimpl.h"
118016bdd1SSatish Balay 
125615d1e5SSatish Balay #undef __FUNC__
135615d1e5SSatish Balay #define __FUNC__ "MatSetUpMultiply_MPIBAIJ"
14d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat)
158016bdd1SSatish Balay {
16d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
17d9653453SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data);
18d9653453SSatish Balay   int        Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
19537820f0SBarry Smith   int        col,bs = baij->bs,*tmp,*stmp;
208016bdd1SSatish Balay   IS         from,to;
218016bdd1SSatish Balay   Vec        gvec;
228016bdd1SSatish Balay 
23*3a40ed3dSBarry Smith   PetscFunctionBegin;
248016bdd1SSatish Balay   /* For the first stab we make an array as long as the number of columns */
25d9653453SSatish Balay   /* mark those columns that are in baij->B */
269dbe0bc3SSatish Balay   indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) ); CHKPTRQ(indices);
27d9653453SSatish Balay   PetscMemzero(indices,Nbs*sizeof(int));
28d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
298016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
30d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
31d9653453SSatish Balay       indices[aj[B->i[i] + j] ] = 1;
328016bdd1SSatish Balay     }
338016bdd1SSatish Balay   }
348016bdd1SSatish Balay 
358016bdd1SSatish Balay   /* form array of columns we need */
368016bdd1SSatish Balay   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray);
37d9653453SSatish Balay   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp)
388016bdd1SSatish Balay   ec = 0;
39d9653453SSatish Balay   for ( i=0; i<Nbs; i++ ) {
408016bdd1SSatish Balay     if (indices[i]) garray[ec++] = i;
418016bdd1SSatish Balay   }
428016bdd1SSatish Balay 
438016bdd1SSatish Balay   /* make indices now point into garray */
448016bdd1SSatish Balay   for ( i=0; i<ec; i++ ) {
45d9653453SSatish Balay     indices[garray[i]] = i;
468016bdd1SSatish Balay   }
478016bdd1SSatish Balay 
488016bdd1SSatish Balay   /* compact out the extra columns in B */
49d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
508016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
51d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
528016bdd1SSatish Balay     }
538016bdd1SSatish Balay   }
54d9653453SSatish Balay   B->nbs = ec;
55d9653453SSatish Balay   B->n   = ec*B->bs;
568016bdd1SSatish Balay   PetscFree(indices);
578016bdd1SSatish Balay 
5857b952d6SSatish Balay   for ( i=0,col=0; i<ec; i++ ) {
5957b952d6SSatish Balay     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
60d9653453SSatish Balay   }
618016bdd1SSatish Balay   /* create local vector that is used to scatter into */
62029af93fSBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr);
638016bdd1SSatish Balay 
64c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
65c16cb8f2SBarry Smith 
66029af93fSBarry Smith   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */
67c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
68c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
69c16cb8f2SBarry Smith   }
70029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
71c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
72c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
73c16cb8f2SBarry Smith   }
74c16cb8f2SBarry Smith 
75537820f0SBarry Smith   stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp);
76537820f0SBarry Smith   for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; }
77029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
78537820f0SBarry Smith   PetscFree(stmp);
798016bdd1SSatish Balay 
808016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
818016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
828016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
838016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
84d9653453SSatish Balay   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr);
858016bdd1SSatish Balay 
868016bdd1SSatish Balay   /* gnerate the scatter context */
87d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
8890f02eecSBarry Smith 
8990f02eecSBarry Smith   /*
9090f02eecSBarry Smith       Post the receives for the first matrix vector product. We sync-chronize after
9190f02eecSBarry Smith     this on the chance that the user immediately calls MatMult() after assemblying
9290f02eecSBarry Smith     the matrix.
9390f02eecSBarry Smith   */
9443a90d84SBarry Smith   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
9590f02eecSBarry Smith   MPI_Barrier(mat->comm);
9690f02eecSBarry Smith 
97d9653453SSatish Balay   PLogObjectParent(mat,baij->Mvctx);
98d9653453SSatish Balay   PLogObjectParent(mat,baij->lvec);
998016bdd1SSatish Balay   PLogObjectParent(mat,from);
1008016bdd1SSatish Balay   PLogObjectParent(mat,to);
101d9653453SSatish Balay   baij->garray = garray;
1028016bdd1SSatish Balay   PLogObjectMemory(mat,(ec+1)*sizeof(int));
1038016bdd1SSatish Balay   ierr = ISDestroy(from); CHKERRQ(ierr);
1048016bdd1SSatish Balay   ierr = ISDestroy(to); CHKERRQ(ierr);
1058016bdd1SSatish Balay   ierr = VecDestroy(gvec);
106d9653453SSatish Balay   PetscFree(tmp);
107*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
1088016bdd1SSatish Balay }
1098016bdd1SSatish Balay 
1108016bdd1SSatish Balay 
1118016bdd1SSatish Balay /*
112d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1138016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1148016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1158016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1168016bdd1SSatish Balay 
1178016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1188016bdd1SSatish Balay    they are sloppy.
1198016bdd1SSatish Balay */
1205615d1e5SSatish Balay #undef __FUNC__
1215615d1e5SSatish Balay #define __FUNC__ "DisAssemble_MPIBAIJ"
122d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A)
1238016bdd1SSatish Balay {
124d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
125d9653453SSatish Balay   Mat        B = baij->B,Bnew;
126d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
127d9653453SSatish Balay   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
128d9653453SSatish Balay   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
129d9653453SSatish Balay   Scalar     *a=Bbaij->a;
1308016bdd1SSatish Balay 
131*3a40ed3dSBarry Smith   PetscFunctionBegin;
1328016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
133d9653453SSatish Balay   ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */
134d9653453SSatish Balay   ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0;
135d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0;
136d9653453SSatish Balay   if (baij->colmap) {
137d9653453SSatish Balay     PetscFree(baij->colmap); baij->colmap = 0;
138d9653453SSatish Balay     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
1398016bdd1SSatish Balay   }
1408016bdd1SSatish Balay 
1418016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1426d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1436d4a8577SBarry Smith   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1448016bdd1SSatish Balay 
1458016bdd1SSatish Balay   /* invent new B and copy stuff over */
146d9653453SSatish Balay   nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz);
147d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
148d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
1498016bdd1SSatish Balay   }
150029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr);
1518016bdd1SSatish Balay   PetscFree(nz);
152d9653453SSatish Balay 
153d9653453SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
154d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
155d9653453SSatish Balay     rvals[0] = bs*i;
156d9653453SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
157d9653453SSatish Balay     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
15857b952d6SSatish Balay       col = garray[Bbaij->j[j]]*bs;
159d9653453SSatish Balay       for (k=0; k<bs; k++ ) {
160d9653453SSatish Balay         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr);
161d9653453SSatish Balay         col++;
1628016bdd1SSatish Balay       }
1638016bdd1SSatish Balay     }
164d9653453SSatish Balay   }
165d9653453SSatish Balay   PetscFree(baij->garray); baij->garray = 0;
16657b952d6SSatish Balay   PetscFree(rvals);
1678016bdd1SSatish Balay   PLogObjectMemory(A,-ec*sizeof(int));
1688016bdd1SSatish Balay   ierr = MatDestroy(B); CHKERRQ(ierr);
1698016bdd1SSatish Balay   PLogObjectParent(A,Bnew);
170d9653453SSatish Balay   baij->B = Bnew;
1718016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
172*3a40ed3dSBarry Smith   PetscFunctionReturn(0);
1738016bdd1SSatish Balay }
1748016bdd1SSatish Balay 
1758016bdd1SSatish Balay 
176