xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision fa46199cd6736f835a9dfd5fb06fcbb3763bab4e)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*fa46199cSSatish Balay static char vcid[] = "$Id: mmbaij.c,v 1.21 1999/01/31 16:07:13 bsmith Exp balay $";
38016bdd1SSatish Balay #endif
48016bdd1SSatish Balay 
58016bdd1SSatish Balay 
68016bdd1SSatish Balay /*
7d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
88016bdd1SSatish Balay */
970f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
108016bdd1SSatish Balay #include "src/vec/vecimpl.h"
118016bdd1SSatish Balay 
125615d1e5SSatish Balay #undef __FUNC__
135615d1e5SSatish Balay #define __FUNC__ "MatSetUpMultiply_MPIBAIJ"
14d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat)
158016bdd1SSatish Balay {
16d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
17d9653453SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data);
18d9653453SSatish Balay   int        Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
19537820f0SBarry Smith   int        col,bs = baij->bs,*tmp,*stmp;
208016bdd1SSatish Balay   IS         from,to;
218016bdd1SSatish Balay   Vec        gvec;
2273a2e727SSatish Balay #if defined (USE_CTABLE)
2373a2e727SSatish Balay   Table      gid1_lid1;
2473a2e727SSatish Balay   CTablePos  tpos;
2573a2e727SSatish Balay   int        gid, lid;
2673a2e727SSatish Balay #endif
278016bdd1SSatish Balay 
283a40ed3dSBarry Smith   PetscFunctionBegin;
2973a2e727SSatish Balay 
3073a2e727SSatish Balay #if defined (USE_CTABLE)
3173a2e727SSatish Balay   /* use a table - Mark Adams */
32*fa46199cSSatish Balay   TableCreate(B->mbs,&gid1_lid1);
3373a2e727SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
3473a2e727SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
35*fa46199cSSatish Balay       int data,gid1 = aj[B->i[i]+j] + 1;
36*fa46199cSSatish Balay       ierr = TableFind(gid1_lid1,gid1,&data) ; CHKERRQ(ierr);
37*fa46199cSSatish Balay       if (!data) {
3873a2e727SSatish Balay         /* one based table */
3973a2e727SSatish Balay         ierr = TableAdd(gid1_lid1,gid1,++ec); CHKERRQ(ierr);
4073a2e727SSatish Balay       }
4173a2e727SSatish Balay     }
4273a2e727SSatish Balay   }
4373a2e727SSatish Balay   /* form array of columns we need */
4473a2e727SSatish Balay   garray = (int *)PetscMalloc((ec+1)*sizeof(int)); CHKPTRQ(garray);
4573a2e727SSatish Balay   tmp    = (int *)PetscMalloc((ec*bs+1)*sizeof(int)); CHKPTRQ(tmp);
4673a2e727SSatish Balay   ierr   = TableGetHeadPosition(gid1_lid1,&tpos); CHKERRQ(ierr);
4773a2e727SSatish Balay   while (tpos) {
4873a2e727SSatish Balay     ierr = TableGetNext(gid1_lid1,&tpos,&gid,&lid); CHKERRQ(ierr);
4973a2e727SSatish Balay     gid--; lid--;
5073a2e727SSatish Balay     garray[lid] = gid;
5173a2e727SSatish Balay   }
520064e2bbSSatish Balay   ierr = PetscSortInt(ec,garray); CHKERRQ(ierr);
530064e2bbSSatish Balay   /* qsort( garray, ec, sizeof(int), intcomparcarc ); */
5473a2e727SSatish Balay   TableRemoveAll(gid1_lid1);
5573a2e727SSatish Balay   for ( i=0; i<ec; i++ ) {
5673a2e727SSatish Balay     ierr = TableAdd(gid1_lid1,garray[i]+1,i+1); CHKERRQ(ierr);
5773a2e727SSatish Balay   }
5873a2e727SSatish Balay   /* compact out the extra columns in B */
5973a2e727SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
6073a2e727SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
6173a2e727SSatish Balay       int gid1 = aj[B->i[i] + j] + 1;
62*fa46199cSSatish Balay       ierr = TableFind(gid1_lid1,gid1,&lid); CHKERRQ(ierr);
63*fa46199cSSatish Balay       lid --;
6473a2e727SSatish Balay       aj[B->i[i]+j] = lid;
6573a2e727SSatish Balay     }
6673a2e727SSatish Balay   }
6773a2e727SSatish Balay   B->nbs = ec;
6873a2e727SSatish Balay   B->n   = ec*B->bs;
6973a2e727SSatish Balay   TableDelete(gid1_lid1);
7073a2e727SSatish Balay   /* Mark Adams */
7173a2e727SSatish Balay #else
728016bdd1SSatish Balay   /* For the first stab we make an array as long as the number of columns */
73d9653453SSatish Balay   /* mark those columns that are in baij->B */
749dbe0bc3SSatish Balay   indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) ); CHKPTRQ(indices);
75d9653453SSatish Balay   PetscMemzero(indices,Nbs*sizeof(int));
76d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
778016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
78d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
79d9653453SSatish Balay       indices[aj[B->i[i] + j] ] = 1;
808016bdd1SSatish Balay     }
818016bdd1SSatish Balay   }
828016bdd1SSatish Balay 
838016bdd1SSatish Balay   /* form array of columns we need */
848016bdd1SSatish Balay   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray);
85d9653453SSatish Balay   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp)
868016bdd1SSatish Balay   ec = 0;
87d9653453SSatish Balay   for ( i=0; i<Nbs; i++ ) {
880bdbc534SSatish Balay     if (indices[i]) {
890bdbc534SSatish Balay       garray[ec++] = i;
900bdbc534SSatish Balay     }
918016bdd1SSatish Balay   }
928016bdd1SSatish Balay 
938016bdd1SSatish Balay   /* make indices now point into garray */
948016bdd1SSatish Balay   for ( i=0; i<ec; i++ ) {
95d9653453SSatish Balay     indices[garray[i]] = i;
968016bdd1SSatish Balay   }
978016bdd1SSatish Balay 
988016bdd1SSatish Balay   /* compact out the extra columns in B */
99d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
1008016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
101d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
1028016bdd1SSatish Balay     }
1038016bdd1SSatish Balay   }
104d9653453SSatish Balay   B->nbs = ec;
105d9653453SSatish Balay   B->n   = ec*B->bs;
1068016bdd1SSatish Balay   PetscFree(indices);
10773a2e727SSatish Balay #endif
1088016bdd1SSatish Balay 
10957b952d6SSatish Balay   for ( i=0,col=0; i<ec; i++ ) {
11057b952d6SSatish Balay     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
111d9653453SSatish Balay   }
1128016bdd1SSatish Balay   /* create local vector that is used to scatter into */
113029af93fSBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr);
1148016bdd1SSatish Balay 
115c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
116c16cb8f2SBarry Smith 
117029af93fSBarry Smith   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */
118c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
119c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
120c16cb8f2SBarry Smith   }
121029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
122c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
123c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
124c16cb8f2SBarry Smith   }
125c16cb8f2SBarry Smith 
126537820f0SBarry Smith   stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp);
127537820f0SBarry Smith   for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; }
128029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
129537820f0SBarry Smith   PetscFree(stmp);
1308016bdd1SSatish Balay 
1318016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
1328016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
1338016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
1348016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
135d9653453SSatish Balay   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr);
1368016bdd1SSatish Balay 
1378016bdd1SSatish Balay   /* gnerate the scatter context */
138d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
13990f02eecSBarry Smith 
14090f02eecSBarry Smith   /*
14190f02eecSBarry Smith       Post the receives for the first matrix vector product. We sync-chronize after
14290f02eecSBarry Smith     this on the chance that the user immediately calls MatMult() after assemblying
14390f02eecSBarry Smith     the matrix.
14490f02eecSBarry Smith   */
14543a90d84SBarry Smith   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
146ca161407SBarry Smith   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
14790f02eecSBarry Smith 
148d9653453SSatish Balay   PLogObjectParent(mat,baij->Mvctx);
149d9653453SSatish Balay   PLogObjectParent(mat,baij->lvec);
1508016bdd1SSatish Balay   PLogObjectParent(mat,from);
1518016bdd1SSatish Balay   PLogObjectParent(mat,to);
152d9653453SSatish Balay   baij->garray = garray;
1538016bdd1SSatish Balay   PLogObjectMemory(mat,(ec+1)*sizeof(int));
1548016bdd1SSatish Balay   ierr = ISDestroy(from); CHKERRQ(ierr);
1558016bdd1SSatish Balay   ierr = ISDestroy(to); CHKERRQ(ierr);
1568016bdd1SSatish Balay   ierr = VecDestroy(gvec);
157d9653453SSatish Balay   PetscFree(tmp);
1583a40ed3dSBarry Smith   PetscFunctionReturn(0);
1598016bdd1SSatish Balay }
1608016bdd1SSatish Balay 
1618016bdd1SSatish Balay 
1628016bdd1SSatish Balay /*
163d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1648016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1658016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1668016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1678016bdd1SSatish Balay 
1688016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1698016bdd1SSatish Balay    they are sloppy.
1708016bdd1SSatish Balay */
1715615d1e5SSatish Balay #undef __FUNC__
1725615d1e5SSatish Balay #define __FUNC__ "DisAssemble_MPIBAIJ"
173d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A)
1748016bdd1SSatish Balay {
175d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
176d9653453SSatish Balay   Mat        B = baij->B,Bnew;
177d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
178d9653453SSatish Balay   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
179d9653453SSatish Balay   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
180d9653453SSatish Balay   Scalar     *a=Bbaij->a;
1818016bdd1SSatish Balay 
1823a40ed3dSBarry Smith   PetscFunctionBegin;
1838016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
184d9653453SSatish Balay   ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */
185d9653453SSatish Balay   ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0;
186d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0;
187d9653453SSatish Balay   if (baij->colmap) {
18848e59246SSatish Balay #if defined (USE_CTABLE)
18948e59246SSatish Balay     TableDelete(baij->colmap); baij->colmap = 0;
19048e59246SSatish Balay #else
191d9653453SSatish Balay     PetscFree(baij->colmap); baij->colmap = 0;
192d9653453SSatish Balay     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
19348e59246SSatish Balay #endif
1948016bdd1SSatish Balay   }
1958016bdd1SSatish Balay 
1968016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1976d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1986d4a8577SBarry Smith   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1998016bdd1SSatish Balay 
2008016bdd1SSatish Balay   /* invent new B and copy stuff over */
201d9653453SSatish Balay   nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz);
202d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
203d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
2048016bdd1SSatish Balay   }
205029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr);
2068016bdd1SSatish Balay   PetscFree(nz);
207d9653453SSatish Balay 
208d9653453SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
209d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
210d9653453SSatish Balay     rvals[0] = bs*i;
211d9653453SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
212d9653453SSatish Balay     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
21357b952d6SSatish Balay       col = garray[Bbaij->j[j]]*bs;
214d9653453SSatish Balay       for (k=0; k<bs; k++ ) {
21583271157SBarry Smith         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,B->insertmode);CHKERRQ(ierr);
216d9653453SSatish Balay         col++;
2178016bdd1SSatish Balay       }
2188016bdd1SSatish Balay     }
219d9653453SSatish Balay   }
220d9653453SSatish Balay   PetscFree(baij->garray); baij->garray = 0;
22157b952d6SSatish Balay   PetscFree(rvals);
2228016bdd1SSatish Balay   PLogObjectMemory(A,-ec*sizeof(int));
2238016bdd1SSatish Balay   ierr = MatDestroy(B); CHKERRQ(ierr);
2248016bdd1SSatish Balay   PLogObjectParent(A,Bnew);
225d9653453SSatish Balay   baij->B = Bnew;
2268016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
2273a40ed3dSBarry Smith   PetscFunctionReturn(0);
2288016bdd1SSatish Balay }
2298016bdd1SSatish Balay 
2308016bdd1SSatish Balay 
231