xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 90f02eec332fcca4c33b4e7b21cabed76bf0614e)
18016bdd1SSatish Balay #ifndef lint
2*90f02eecSBarry Smith static char vcid[] = "$Id: mmbaij.c,v 1.7 1996/08/15 12:48:12 bsmith Exp bsmith $";
38016bdd1SSatish Balay #endif
48016bdd1SSatish Balay 
58016bdd1SSatish Balay 
68016bdd1SSatish Balay /*
7d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
88016bdd1SSatish Balay */
970f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
108016bdd1SSatish Balay #include "src/vec/vecimpl.h"
118016bdd1SSatish Balay 
12d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat)
138016bdd1SSatish Balay {
14d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
15d9653453SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data);
16d9653453SSatish Balay   int        Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
17537820f0SBarry Smith   int        col,bs = baij->bs,*tmp,*stmp;
188016bdd1SSatish Balay   IS         from,to;
198016bdd1SSatish Balay   Vec        gvec;
208016bdd1SSatish Balay 
218016bdd1SSatish Balay   /* For the first stab we make an array as long as the number of columns */
22d9653453SSatish Balay   /* mark those columns that are in baij->B */
23d9653453SSatish Balay   indices = (int *) PetscMalloc( Nbs*sizeof(int) ); CHKPTRQ(indices);
24d9653453SSatish Balay   PetscMemzero(indices,Nbs*sizeof(int));
25d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
268016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
27d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
28d9653453SSatish Balay       indices[aj[B->i[i] + j] ] = 1;
298016bdd1SSatish Balay     }
308016bdd1SSatish Balay   }
318016bdd1SSatish Balay 
328016bdd1SSatish Balay   /* form array of columns we need */
338016bdd1SSatish Balay   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray);
34d9653453SSatish Balay   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp)
358016bdd1SSatish Balay   ec = 0;
36d9653453SSatish Balay   for ( i=0; i<Nbs; i++ ) {
378016bdd1SSatish Balay     if (indices[i]) garray[ec++] = i;
388016bdd1SSatish Balay   }
398016bdd1SSatish Balay 
408016bdd1SSatish Balay   /* make indices now point into garray */
418016bdd1SSatish Balay   for ( i=0; i<ec; i++ ) {
42d9653453SSatish Balay     indices[garray[i]] = i;
438016bdd1SSatish Balay   }
448016bdd1SSatish Balay 
458016bdd1SSatish Balay   /* compact out the extra columns in B */
46d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
478016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
48d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
498016bdd1SSatish Balay     }
508016bdd1SSatish Balay   }
51d9653453SSatish Balay   B->nbs = ec;
52d9653453SSatish Balay   B->n   = ec*B->bs;
538016bdd1SSatish Balay   PetscFree(indices);
548016bdd1SSatish Balay 
5557b952d6SSatish Balay   for ( i=0,col=0; i<ec; i++ ) {
5657b952d6SSatish Balay     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
57d9653453SSatish Balay   }
588016bdd1SSatish Balay   /* create local vector that is used to scatter into */
59d9653453SSatish Balay   ierr = VecCreateSeq(MPI_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr);
608016bdd1SSatish Balay 
61c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
62c16cb8f2SBarry Smith 
63537820f0SBarry Smith   /* ierr = ISCreateGeneral(MPI_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */
64c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
65c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
66c16cb8f2SBarry Smith   }
67537820f0SBarry Smith   ierr = ISCreateBlock(MPI_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
68c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
69c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
70c16cb8f2SBarry Smith   }
71c16cb8f2SBarry Smith 
72537820f0SBarry Smith   stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp);
73537820f0SBarry Smith   for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; }
74537820f0SBarry Smith   ierr = ISCreateBlock(MPI_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
75537820f0SBarry Smith   PetscFree(stmp);
768016bdd1SSatish Balay 
778016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
788016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
798016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
808016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
81d9653453SSatish Balay   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr);
828016bdd1SSatish Balay 
838016bdd1SSatish Balay   /* gnerate the scatter context */
84d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
85*90f02eecSBarry Smith 
86*90f02eecSBarry Smith   /*
87*90f02eecSBarry Smith       Post the receives for the first matrix vector product. We sync-chronize after
88*90f02eecSBarry Smith     this on the chance that the user immediately calls MatMult() after assemblying
89*90f02eecSBarry Smith     the matrix.
90*90f02eecSBarry Smith   */
91*90f02eecSBarry Smith   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_ALL,baij->Mvctx);CHKERRQ(ierr);
92*90f02eecSBarry Smith   MPI_Barrier(mat->comm);
93*90f02eecSBarry Smith 
94d9653453SSatish Balay   PLogObjectParent(mat,baij->Mvctx);
95d9653453SSatish Balay   PLogObjectParent(mat,baij->lvec);
968016bdd1SSatish Balay   PLogObjectParent(mat,from);
978016bdd1SSatish Balay   PLogObjectParent(mat,to);
98d9653453SSatish Balay   baij->garray = garray;
998016bdd1SSatish Balay   PLogObjectMemory(mat,(ec+1)*sizeof(int));
1008016bdd1SSatish Balay   ierr = ISDestroy(from); CHKERRQ(ierr);
1018016bdd1SSatish Balay   ierr = ISDestroy(to); CHKERRQ(ierr);
1028016bdd1SSatish Balay   ierr = VecDestroy(gvec);
103d9653453SSatish Balay   PetscFree(tmp);
1048016bdd1SSatish Balay   return 0;
1058016bdd1SSatish Balay }
1068016bdd1SSatish Balay 
1078016bdd1SSatish Balay 
1088016bdd1SSatish Balay /*
109d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1108016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1118016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1128016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1138016bdd1SSatish Balay 
1148016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1158016bdd1SSatish Balay    they are sloppy.
1168016bdd1SSatish Balay */
117d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A)
1188016bdd1SSatish Balay {
119d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
120d9653453SSatish Balay   Mat        B = baij->B,Bnew;
121d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
122d9653453SSatish Balay   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
123d9653453SSatish Balay   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
124d9653453SSatish Balay   Scalar     *a=Bbaij->a;
1258016bdd1SSatish Balay 
1268016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
127d9653453SSatish Balay   ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */
128d9653453SSatish Balay   ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0;
129d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0;
130d9653453SSatish Balay   if (baij->colmap) {
131d9653453SSatish Balay     PetscFree(baij->colmap); baij->colmap = 0;
132d9653453SSatish Balay     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
1338016bdd1SSatish Balay   }
1348016bdd1SSatish Balay 
1358016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1366d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1376d4a8577SBarry Smith   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1388016bdd1SSatish Balay 
1398016bdd1SSatish Balay   /* invent new B and copy stuff over */
140d9653453SSatish Balay   nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz);
141d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
142d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
1438016bdd1SSatish Balay   }
144d9653453SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr);
1458016bdd1SSatish Balay   PetscFree(nz);
146d9653453SSatish Balay 
147d9653453SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
148d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
149d9653453SSatish Balay     rvals[0] = bs*i;
150d9653453SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
151d9653453SSatish Balay     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
15257b952d6SSatish Balay       col = garray[Bbaij->j[j]]*bs;
153d9653453SSatish Balay       for (k=0; k<bs; k++ ) {
154d9653453SSatish Balay         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr);
155d9653453SSatish Balay         col++;
1568016bdd1SSatish Balay       }
1578016bdd1SSatish Balay     }
158d9653453SSatish Balay   }
159d9653453SSatish Balay   PetscFree(baij->garray); baij->garray = 0;
16057b952d6SSatish Balay   PetscFree(rvals);
1618016bdd1SSatish Balay   PLogObjectMemory(A,-ec*sizeof(int));
1628016bdd1SSatish Balay   ierr = MatDestroy(B); CHKERRQ(ierr);
1638016bdd1SSatish Balay   PLogObjectParent(A,Bnew);
164d9653453SSatish Balay   baij->B = Bnew;
1658016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
1668016bdd1SSatish Balay   return 0;
1678016bdd1SSatish Balay }
1688016bdd1SSatish Balay 
1698016bdd1SSatish Balay 
170