xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 5615d1e584023db9367fb782d85b1b4ebbb8df18)
18016bdd1SSatish Balay #ifndef lint
2*5615d1e5SSatish Balay static char vcid[] = "$Id: mmbaij.c,v 1.10 1996/12/17 23:44:00 balay Exp balay $";
38016bdd1SSatish Balay #endif
48016bdd1SSatish Balay 
58016bdd1SSatish Balay 
68016bdd1SSatish Balay /*
7d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
88016bdd1SSatish Balay */
970f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
108016bdd1SSatish Balay #include "src/vec/vecimpl.h"
118016bdd1SSatish Balay 
12*5615d1e5SSatish Balay #undef __FUNC__
13*5615d1e5SSatish Balay #define __FUNC__ "MatSetUpMultiply_MPIBAIJ"
14d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat)
158016bdd1SSatish Balay {
16d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
17d9653453SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data);
18d9653453SSatish Balay   int        Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
19537820f0SBarry Smith   int        col,bs = baij->bs,*tmp,*stmp;
208016bdd1SSatish Balay   IS         from,to;
218016bdd1SSatish Balay   Vec        gvec;
228016bdd1SSatish Balay 
238016bdd1SSatish Balay   /* For the first stab we make an array as long as the number of columns */
24d9653453SSatish Balay   /* mark those columns that are in baij->B */
25d9653453SSatish Balay   indices = (int *) PetscMalloc( Nbs*sizeof(int) ); CHKPTRQ(indices);
26d9653453SSatish Balay   PetscMemzero(indices,Nbs*sizeof(int));
27d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
288016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
29d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
30d9653453SSatish Balay       indices[aj[B->i[i] + j] ] = 1;
318016bdd1SSatish Balay     }
328016bdd1SSatish Balay   }
338016bdd1SSatish Balay 
348016bdd1SSatish Balay   /* form array of columns we need */
358016bdd1SSatish Balay   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray);
36d9653453SSatish Balay   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp)
378016bdd1SSatish Balay   ec = 0;
38d9653453SSatish Balay   for ( i=0; i<Nbs; i++ ) {
398016bdd1SSatish Balay     if (indices[i]) garray[ec++] = i;
408016bdd1SSatish Balay   }
418016bdd1SSatish Balay 
428016bdd1SSatish Balay   /* make indices now point into garray */
438016bdd1SSatish Balay   for ( i=0; i<ec; i++ ) {
44d9653453SSatish Balay     indices[garray[i]] = i;
458016bdd1SSatish Balay   }
468016bdd1SSatish Balay 
478016bdd1SSatish Balay   /* compact out the extra columns in B */
48d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
498016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
50d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
518016bdd1SSatish Balay     }
528016bdd1SSatish Balay   }
53d9653453SSatish Balay   B->nbs = ec;
54d9653453SSatish Balay   B->n   = ec*B->bs;
558016bdd1SSatish Balay   PetscFree(indices);
568016bdd1SSatish Balay 
5757b952d6SSatish Balay   for ( i=0,col=0; i<ec; i++ ) {
5857b952d6SSatish Balay     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
59d9653453SSatish Balay   }
608016bdd1SSatish Balay   /* create local vector that is used to scatter into */
61d9653453SSatish Balay   ierr = VecCreateSeq(MPI_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr);
628016bdd1SSatish Balay 
63c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
64c16cb8f2SBarry Smith 
65537820f0SBarry Smith   /* ierr = ISCreateGeneral(MPI_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */
66c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
67c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
68c16cb8f2SBarry Smith   }
69537820f0SBarry Smith   ierr = ISCreateBlock(MPI_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
70c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
71c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
72c16cb8f2SBarry Smith   }
73c16cb8f2SBarry Smith 
74537820f0SBarry Smith   stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp);
75537820f0SBarry Smith   for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; }
76537820f0SBarry Smith   ierr = ISCreateBlock(MPI_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
77537820f0SBarry Smith   PetscFree(stmp);
788016bdd1SSatish Balay 
798016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
808016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
818016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
828016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
83d9653453SSatish Balay   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr);
848016bdd1SSatish Balay 
858016bdd1SSatish Balay   /* gnerate the scatter context */
86d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
8790f02eecSBarry Smith 
8890f02eecSBarry Smith   /*
8990f02eecSBarry Smith       Post the receives for the first matrix vector product. We sync-chronize after
9090f02eecSBarry Smith     this on the chance that the user immediately calls MatMult() after assemblying
9190f02eecSBarry Smith     the matrix.
9290f02eecSBarry Smith   */
9343a90d84SBarry Smith   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
9490f02eecSBarry Smith   MPI_Barrier(mat->comm);
9590f02eecSBarry Smith 
96d9653453SSatish Balay   PLogObjectParent(mat,baij->Mvctx);
97d9653453SSatish Balay   PLogObjectParent(mat,baij->lvec);
988016bdd1SSatish Balay   PLogObjectParent(mat,from);
998016bdd1SSatish Balay   PLogObjectParent(mat,to);
100d9653453SSatish Balay   baij->garray = garray;
1018016bdd1SSatish Balay   PLogObjectMemory(mat,(ec+1)*sizeof(int));
1028016bdd1SSatish Balay   ierr = ISDestroy(from); CHKERRQ(ierr);
1038016bdd1SSatish Balay   ierr = ISDestroy(to); CHKERRQ(ierr);
1048016bdd1SSatish Balay   ierr = VecDestroy(gvec);
105d9653453SSatish Balay   PetscFree(tmp);
1068016bdd1SSatish Balay   return 0;
1078016bdd1SSatish Balay }
1088016bdd1SSatish Balay 
1098016bdd1SSatish Balay 
1108016bdd1SSatish Balay /*
111d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1128016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1138016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1148016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1158016bdd1SSatish Balay 
1168016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1178016bdd1SSatish Balay    they are sloppy.
1188016bdd1SSatish Balay */
119*5615d1e5SSatish Balay #undef __FUNC__
120*5615d1e5SSatish Balay #define __FUNC__ "DisAssemble_MPIBAIJ"
121d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A)
1228016bdd1SSatish Balay {
123d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
124d9653453SSatish Balay   Mat        B = baij->B,Bnew;
125d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
126d9653453SSatish Balay   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
127d9653453SSatish Balay   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
128d9653453SSatish Balay   Scalar     *a=Bbaij->a;
1298016bdd1SSatish Balay 
1308016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
131d9653453SSatish Balay   ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */
132d9653453SSatish Balay   ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0;
133d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0;
134d9653453SSatish Balay   if (baij->colmap) {
135d9653453SSatish Balay     PetscFree(baij->colmap); baij->colmap = 0;
136d9653453SSatish Balay     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
1378016bdd1SSatish Balay   }
1388016bdd1SSatish Balay 
1398016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1406d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1416d4a8577SBarry Smith   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1428016bdd1SSatish Balay 
1438016bdd1SSatish Balay   /* invent new B and copy stuff over */
144d9653453SSatish Balay   nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz);
145d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
146d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
1478016bdd1SSatish Balay   }
148d9653453SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr);
1498016bdd1SSatish Balay   PetscFree(nz);
150d9653453SSatish Balay 
151d9653453SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
152d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
153d9653453SSatish Balay     rvals[0] = bs*i;
154d9653453SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
155d9653453SSatish Balay     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
15657b952d6SSatish Balay       col = garray[Bbaij->j[j]]*bs;
157d9653453SSatish Balay       for (k=0; k<bs; k++ ) {
158d9653453SSatish Balay         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr);
159d9653453SSatish Balay         col++;
1608016bdd1SSatish Balay       }
1618016bdd1SSatish Balay     }
162d9653453SSatish Balay   }
163d9653453SSatish Balay   PetscFree(baij->garray); baij->garray = 0;
16457b952d6SSatish Balay   PetscFree(rvals);
1658016bdd1SSatish Balay   PLogObjectMemory(A,-ec*sizeof(int));
1668016bdd1SSatish Balay   ierr = MatDestroy(B); CHKERRQ(ierr);
1678016bdd1SSatish Balay   PLogObjectParent(A,Bnew);
168d9653453SSatish Balay   baij->B = Bnew;
1698016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
1708016bdd1SSatish Balay   return 0;
1718016bdd1SSatish Balay }
1728016bdd1SSatish Balay 
1738016bdd1SSatish Balay 
174