xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision c16cb8f2de640b3af1673495cb6d811eff24116a)
18016bdd1SSatish Balay #ifndef lint
2*c16cb8f2SBarry Smith static char vcid[] = "$Id: mmbaij.c,v 1.4 1996/07/08 22:20:04 bsmith Exp bsmith $";
38016bdd1SSatish Balay #endif
48016bdd1SSatish Balay 
58016bdd1SSatish Balay 
68016bdd1SSatish Balay /*
7d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
88016bdd1SSatish Balay */
9d9653453SSatish Balay #include "mpibaij.h"
108016bdd1SSatish Balay #include "src/vec/vecimpl.h"
11d9653453SSatish Balay #include "../seq/baij.h"
128016bdd1SSatish Balay 
13d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat)
148016bdd1SSatish Balay {
15d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
16d9653453SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data);
17d9653453SSatish Balay   int        Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
1857b952d6SSatish Balay   int        col,bs = baij->bs,*tmp;
198016bdd1SSatish Balay   IS         from,to;
208016bdd1SSatish Balay   Vec        gvec;
218016bdd1SSatish Balay 
228016bdd1SSatish Balay   /* For the first stab we make an array as long as the number of columns */
23d9653453SSatish Balay   /* mark those columns that are in baij->B */
24d9653453SSatish Balay   indices = (int *) PetscMalloc( Nbs*sizeof(int) ); CHKPTRQ(indices);
25d9653453SSatish Balay   PetscMemzero(indices,Nbs*sizeof(int));
26d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
278016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
28d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
29d9653453SSatish Balay       indices[aj[B->i[i] + j] ] = 1;
308016bdd1SSatish Balay     }
318016bdd1SSatish Balay   }
328016bdd1SSatish Balay 
338016bdd1SSatish Balay   /* form array of columns we need */
348016bdd1SSatish Balay   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray);
35d9653453SSatish Balay   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp)
368016bdd1SSatish Balay   ec = 0;
37d9653453SSatish Balay   for ( i=0; i<Nbs; i++ ) {
388016bdd1SSatish Balay     if (indices[i]) garray[ec++] = i;
398016bdd1SSatish Balay   }
408016bdd1SSatish Balay 
418016bdd1SSatish Balay   /* make indices now point into garray */
428016bdd1SSatish Balay   for ( i=0; i<ec; i++ ) {
43d9653453SSatish Balay     indices[garray[i]] = i;
448016bdd1SSatish Balay   }
458016bdd1SSatish Balay 
468016bdd1SSatish Balay   /* compact out the extra columns in B */
47d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
488016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
49d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
508016bdd1SSatish Balay     }
518016bdd1SSatish Balay   }
52d9653453SSatish Balay   B->nbs = ec;
53d9653453SSatish Balay   B->n   = ec*B->bs;
548016bdd1SSatish Balay   PetscFree(indices);
558016bdd1SSatish Balay 
5657b952d6SSatish Balay   for ( i=0,col=0; i<ec; i++ ) {
5757b952d6SSatish Balay     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
58d9653453SSatish Balay   }
598016bdd1SSatish Balay   /* create local vector that is used to scatter into */
60d9653453SSatish Balay   ierr = VecCreateSeq(MPI_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr);
618016bdd1SSatish Balay 
62*c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
63*c16cb8f2SBarry Smith 
64*c16cb8f2SBarry Smith   /* ierr = ISCreateSeq(MPI_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */
65*c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
66*c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
67*c16cb8f2SBarry Smith   }
68*c16cb8f2SBarry Smith   ierr = ISCreateBlockSeq(MPI_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
69*c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
70*c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
71*c16cb8f2SBarry Smith   }
72*c16cb8f2SBarry Smith 
73d9653453SSatish Balay   ierr = ISCreateStrideSeq(MPI_COMM_SELF,ec*bs,0,1,&to);CHKERRQ(ierr);
748016bdd1SSatish Balay 
758016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
768016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
778016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
788016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
79d9653453SSatish Balay   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr);
808016bdd1SSatish Balay 
818016bdd1SSatish Balay   /* gnerate the scatter context */
82d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
83d9653453SSatish Balay   PLogObjectParent(mat,baij->Mvctx);
84d9653453SSatish Balay   PLogObjectParent(mat,baij->lvec);
858016bdd1SSatish Balay   PLogObjectParent(mat,from);
868016bdd1SSatish Balay   PLogObjectParent(mat,to);
87d9653453SSatish Balay   baij->garray = garray;
888016bdd1SSatish Balay   PLogObjectMemory(mat,(ec+1)*sizeof(int));
898016bdd1SSatish Balay   ierr = ISDestroy(from); CHKERRQ(ierr);
908016bdd1SSatish Balay   ierr = ISDestroy(to); CHKERRQ(ierr);
918016bdd1SSatish Balay   ierr = VecDestroy(gvec);
92d9653453SSatish Balay   PetscFree(tmp);
938016bdd1SSatish Balay   return 0;
948016bdd1SSatish Balay }
958016bdd1SSatish Balay 
968016bdd1SSatish Balay 
978016bdd1SSatish Balay /*
98d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
998016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1008016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1018016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1028016bdd1SSatish Balay 
1038016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1048016bdd1SSatish Balay    they are sloppy.
1058016bdd1SSatish Balay */
106d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A)
1078016bdd1SSatish Balay {
108d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
109d9653453SSatish Balay   Mat        B = baij->B,Bnew;
110d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
111d9653453SSatish Balay   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
112d9653453SSatish Balay   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
113d9653453SSatish Balay   Scalar     *a=Bbaij->a;
1148016bdd1SSatish Balay 
1158016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
116d9653453SSatish Balay   ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */
117d9653453SSatish Balay   ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0;
118d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0;
119d9653453SSatish Balay   if (baij->colmap) {
120d9653453SSatish Balay     PetscFree(baij->colmap); baij->colmap = 0;
121d9653453SSatish Balay     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
1228016bdd1SSatish Balay   }
1238016bdd1SSatish Balay 
1248016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1256d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1266d4a8577SBarry Smith   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1278016bdd1SSatish Balay 
1288016bdd1SSatish Balay   /* invent new B and copy stuff over */
129d9653453SSatish Balay   nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz);
130d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
131d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
1328016bdd1SSatish Balay   }
133d9653453SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr);
1348016bdd1SSatish Balay   PetscFree(nz);
135d9653453SSatish Balay 
136d9653453SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
137d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
138d9653453SSatish Balay     rvals[0] = bs*i;
139d9653453SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
140d9653453SSatish Balay     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
14157b952d6SSatish Balay       col = garray[Bbaij->j[j]]*bs;
142d9653453SSatish Balay       for (k=0; k<bs; k++ ) {
143d9653453SSatish Balay         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr);
144d9653453SSatish Balay         col++;
1458016bdd1SSatish Balay       }
1468016bdd1SSatish Balay     }
147d9653453SSatish Balay   }
148d9653453SSatish Balay   PetscFree(baij->garray); baij->garray = 0;
14957b952d6SSatish Balay   PetscFree(rvals);
1508016bdd1SSatish Balay   PLogObjectMemory(A,-ec*sizeof(int));
1518016bdd1SSatish Balay   ierr = MatDestroy(B); CHKERRQ(ierr);
1528016bdd1SSatish Balay   PLogObjectParent(A,Bnew);
153d9653453SSatish Balay   baij->B = Bnew;
1548016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
1558016bdd1SSatish Balay   return 0;
1568016bdd1SSatish Balay }
1578016bdd1SSatish Balay 
1588016bdd1SSatish Balay 
159