xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 0f5bd95cca42d693ada6d048329ab39533680180)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*0f5bd95cSBarry Smith static char vcid[] = "$Id: mmbaij.c,v 1.26 1999/06/30 23:51:57 balay Exp bsmith $";
38016bdd1SSatish Balay #endif
48016bdd1SSatish Balay 
58016bdd1SSatish Balay /*
6d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
78016bdd1SSatish Balay */
870f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
98016bdd1SSatish Balay #include "src/vec/vecimpl.h"
108016bdd1SSatish Balay 
115615d1e5SSatish Balay #undef __FUNC__
125615d1e5SSatish Balay #define __FUNC__ "MatSetUpMultiply_MPIBAIJ"
13d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat)
148016bdd1SSatish Balay {
15d9653453SSatish Balay   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ *) mat->data;
16d9653453SSatish Balay   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ *) (baij->B->data);
17d9653453SSatish Balay   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
18537820f0SBarry Smith   int                col,bs = baij->bs,*tmp,*stmp;
198016bdd1SSatish Balay   IS                 from,to;
208016bdd1SSatish Balay   Vec                gvec;
21aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
22*0f5bd95cSBarry Smith   PetscTable         gid1_lid1;
23*0f5bd95cSBarry Smith   PetscTablePosition tpos;
2473a2e727SSatish Balay   int                gid, lid;
2573a2e727SSatish Balay #endif
268016bdd1SSatish Balay 
273a40ed3dSBarry Smith   PetscFunctionBegin;
2873a2e727SSatish Balay 
29aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
3073a2e727SSatish Balay   /* use a table - Mark Adams */
31*0f5bd95cSBarry Smith   PetscTableCreate(B->mbs,&gid1_lid1);
3273a2e727SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
3373a2e727SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
34fa46199cSSatish Balay       int data,gid1 = aj[B->i[i]+j] + 1;
35*0f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
36fa46199cSSatish Balay       if (!data) {
3773a2e727SSatish Balay         /* one based table */
38*0f5bd95cSBarry Smith         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
3973a2e727SSatish Balay       }
4073a2e727SSatish Balay     }
4173a2e727SSatish Balay   }
4273a2e727SSatish Balay   /* form array of columns we need */
4373a2e727SSatish Balay   garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray);
4473a2e727SSatish Balay   tmp    = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp);
45*0f5bd95cSBarry Smith   ierr   = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
4673a2e727SSatish Balay   while (tpos) {
47*0f5bd95cSBarry Smith     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
4873a2e727SSatish Balay     gid--; lid--;
4973a2e727SSatish Balay     garray[lid] = gid;
5073a2e727SSatish Balay   }
510064e2bbSSatish Balay   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
520064e2bbSSatish Balay   /* qsort( garray, ec, sizeof(int), intcomparcarc ); */
53*0f5bd95cSBarry Smith   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
5473a2e727SSatish Balay   for ( i=0; i<ec; i++ ) {
55*0f5bd95cSBarry Smith     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
5673a2e727SSatish Balay   }
5773a2e727SSatish Balay   /* compact out the extra columns in B */
5873a2e727SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
5973a2e727SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
6073a2e727SSatish Balay       int gid1 = aj[B->i[i] + j] + 1;
61*0f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62fa46199cSSatish Balay       lid --;
6373a2e727SSatish Balay       aj[B->i[i]+j] = lid;
6473a2e727SSatish Balay     }
6573a2e727SSatish Balay   }
6673a2e727SSatish Balay   B->nbs = ec;
6773a2e727SSatish Balay   B->n   = ec*B->bs;
68*0f5bd95cSBarry Smith   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
6973a2e727SSatish Balay   /* Mark Adams */
7073a2e727SSatish Balay #else
718016bdd1SSatish Balay   /* For the first stab we make an array as long as the number of columns */
72d9653453SSatish Balay   /* mark those columns that are in baij->B */
739dbe0bc3SSatish Balay   indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) );CHKPTRQ(indices);
74549d3d68SSatish Balay   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
75d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
768016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
77d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
78d9653453SSatish Balay       indices[aj[B->i[i] + j] ] = 1;
798016bdd1SSatish Balay     }
808016bdd1SSatish Balay   }
818016bdd1SSatish Balay 
828016bdd1SSatish Balay   /* form array of columns we need */
838016bdd1SSatish Balay   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) );CHKPTRQ(garray);
84d9653453SSatish Balay   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) );CHKPTRQ(tmp)
858016bdd1SSatish Balay   ec = 0;
86d9653453SSatish Balay   for ( i=0; i<Nbs; i++ ) {
870bdbc534SSatish Balay     if (indices[i]) {
880bdbc534SSatish Balay       garray[ec++] = i;
890bdbc534SSatish Balay     }
908016bdd1SSatish Balay   }
918016bdd1SSatish Balay 
928016bdd1SSatish Balay   /* make indices now point into garray */
938016bdd1SSatish Balay   for ( i=0; i<ec; i++ ) {
94d9653453SSatish Balay     indices[garray[i]] = i;
958016bdd1SSatish Balay   }
968016bdd1SSatish Balay 
978016bdd1SSatish Balay   /* compact out the extra columns in B */
98d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
998016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
100d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
1018016bdd1SSatish Balay     }
1028016bdd1SSatish Balay   }
103d9653453SSatish Balay   B->nbs = ec;
104d9653453SSatish Balay   B->n   = ec*B->bs;
105606d414cSSatish Balay   ierr = PetscFree(indices);CHKERRQ(ierr);
10673a2e727SSatish Balay #endif
1078016bdd1SSatish Balay 
10857b952d6SSatish Balay   for ( i=0,col=0; i<ec; i++ ) {
10957b952d6SSatish Balay     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
110d9653453SSatish Balay   }
1118016bdd1SSatish Balay   /* create local vector that is used to scatter into */
112029af93fSBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
1138016bdd1SSatish Balay 
114c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
115c16cb8f2SBarry Smith 
116029af93fSBarry Smith   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */
117c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
118c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
119c16cb8f2SBarry Smith   }
120029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
121c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
122c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
123c16cb8f2SBarry Smith   }
124c16cb8f2SBarry Smith 
125537820f0SBarry Smith   stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) );CHKPTRQ(stmp);
126537820f0SBarry Smith   for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; }
127029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
128606d414cSSatish Balay   ierr = PetscFree(stmp);CHKERRQ(ierr);
1298016bdd1SSatish Balay 
1308016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
1318016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
1328016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
1338016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
134d9653453SSatish Balay   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr);
1358016bdd1SSatish Balay 
1368016bdd1SSatish Balay   /* gnerate the scatter context */
137d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
13890f02eecSBarry Smith 
13990f02eecSBarry Smith   /*
14090f02eecSBarry Smith       Post the receives for the first matrix vector product. We sync-chronize after
14190f02eecSBarry Smith     this on the chance that the user immediately calls MatMult() after assemblying
14290f02eecSBarry Smith     the matrix.
14390f02eecSBarry Smith   */
14443a90d84SBarry Smith   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
145ca161407SBarry Smith   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
14690f02eecSBarry Smith 
147d9653453SSatish Balay   PLogObjectParent(mat,baij->Mvctx);
148d9653453SSatish Balay   PLogObjectParent(mat,baij->lvec);
1498016bdd1SSatish Balay   PLogObjectParent(mat,from);
1508016bdd1SSatish Balay   PLogObjectParent(mat,to);
151d9653453SSatish Balay   baij->garray = garray;
1528016bdd1SSatish Balay   PLogObjectMemory(mat,(ec+1)*sizeof(int));
1538016bdd1SSatish Balay   ierr = ISDestroy(from);CHKERRQ(ierr);
1548016bdd1SSatish Balay   ierr = ISDestroy(to);CHKERRQ(ierr);
155888f2ed8SSatish Balay   ierr = VecDestroy(gvec);CHKERRQ(ierr);
156606d414cSSatish Balay   ierr = PetscFree(tmp);CHKERRQ(ierr);
1573a40ed3dSBarry Smith   PetscFunctionReturn(0);
1588016bdd1SSatish Balay }
1598016bdd1SSatish Balay 
1608016bdd1SSatish Balay 
1618016bdd1SSatish Balay /*
162d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1638016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1648016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1658016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1668016bdd1SSatish Balay 
1678016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1688016bdd1SSatish Balay    they are sloppy.
1698016bdd1SSatish Balay */
1705615d1e5SSatish Balay #undef __FUNC__
1715615d1e5SSatish Balay #define __FUNC__ "DisAssemble_MPIBAIJ"
172d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A)
1738016bdd1SSatish Balay {
174d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
175d9653453SSatish Balay   Mat        B = baij->B,Bnew;
176d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
177d9653453SSatish Balay   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
178d9653453SSatish Balay   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
179d9653453SSatish Balay   Scalar     *a=Bbaij->a;
1808016bdd1SSatish Balay 
1813a40ed3dSBarry Smith   PetscFunctionBegin;
1828016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
183888f2ed8SSatish Balay   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */
184d9653453SSatish Balay   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
185d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
186d9653453SSatish Balay   if (baij->colmap) {
187aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
188*0f5bd95cSBarry Smith     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
18948e59246SSatish Balay #else
190606d414cSSatish Balay     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
191606d414cSSatish Balay     baij->colmap = 0;
192d9653453SSatish Balay     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
19348e59246SSatish Balay #endif
1948016bdd1SSatish Balay   }
1958016bdd1SSatish Balay 
1968016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1976d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1986d4a8577SBarry Smith   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1998016bdd1SSatish Balay 
2008016bdd1SSatish Balay   /* invent new B and copy stuff over */
201d9653453SSatish Balay   nz = (int *) PetscMalloc( mbs*sizeof(int) );CHKPTRQ(nz);
202d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
203d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
2048016bdd1SSatish Balay   }
205029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
206606d414cSSatish Balay   ierr = PetscFree(nz);CHKERRQ(ierr);
207d9653453SSatish Balay 
208d9653453SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
209d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
210d9653453SSatish Balay     rvals[0] = bs*i;
211d9653453SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
212d9653453SSatish Balay     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
21357b952d6SSatish Balay       col = garray[Bbaij->j[j]]*bs;
214d9653453SSatish Balay       for (k=0; k<bs; k++ ) {
21583271157SBarry Smith         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,B->insertmode);CHKERRQ(ierr);
216d9653453SSatish Balay         col++;
2178016bdd1SSatish Balay       }
2188016bdd1SSatish Balay     }
219d9653453SSatish Balay   }
220606d414cSSatish Balay   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
221606d414cSSatish Balay   baij->garray = 0;
222606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
2238016bdd1SSatish Balay   PLogObjectMemory(A,-ec*sizeof(int));
2248016bdd1SSatish Balay   ierr = MatDestroy(B);CHKERRQ(ierr);
2258016bdd1SSatish Balay   PLogObjectParent(A,Bnew);
226d9653453SSatish Balay   baij->B = Bnew;
2278016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
2283a40ed3dSBarry Smith   PetscFunctionReturn(0);
2298016bdd1SSatish Balay }
2308016bdd1SSatish Balay 
2318016bdd1SSatish Balay 
232