xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 95d5f7c29374efcfd2ca44c2fe93981fbc2b4454)
1*95d5f7c2SBarry Smith /*$Id: mmbaij.c,v 1.31 2000/04/09 04:36:25 bsmith Exp bsmith $*/
28016bdd1SSatish Balay 
38016bdd1SSatish Balay /*
4d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
58016bdd1SSatish Balay */
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
78016bdd1SSatish Balay #include "src/vec/vecimpl.h"
8*95d5f7c2SBarry Smith extern int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
98016bdd1SSatish Balay 
105615d1e5SSatish Balay #undef __FUNC__
11b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"MatSetUpMultiply_MPIBAIJ"
12d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat)
138016bdd1SSatish Balay {
14d9653453SSatish Balay   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
15d9653453SSatish Balay   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
16d9653453SSatish Balay   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
17537820f0SBarry Smith   int                col,bs = baij->bs,*tmp,*stmp;
188016bdd1SSatish Balay   IS                 from,to;
198016bdd1SSatish Balay   Vec                gvec;
20aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
210f5bd95cSBarry Smith   PetscTable         gid1_lid1;
220f5bd95cSBarry Smith   PetscTablePosition tpos;
2373a2e727SSatish Balay   int                gid,lid;
2473a2e727SSatish Balay #endif
258016bdd1SSatish Balay 
263a40ed3dSBarry Smith   PetscFunctionBegin;
2773a2e727SSatish Balay 
28aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
2973a2e727SSatish Balay   /* use a table - Mark Adams */
300f5bd95cSBarry Smith   PetscTableCreate(B->mbs,&gid1_lid1);
3173a2e727SSatish Balay   for (i=0; i<B->mbs; i++) {
3273a2e727SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
33fa46199cSSatish Balay       int data,gid1 = aj[B->i[i]+j] + 1;
340f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
35fa46199cSSatish Balay       if (!data) {
3673a2e727SSatish Balay         /* one based table */
370f5bd95cSBarry Smith         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
3873a2e727SSatish Balay       }
3973a2e727SSatish Balay     }
4073a2e727SSatish Balay   }
4173a2e727SSatish Balay   /* form array of columns we need */
4273a2e727SSatish Balay   garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray);
4373a2e727SSatish Balay   tmp    = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp);
440f5bd95cSBarry Smith   ierr   = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
4573a2e727SSatish Balay   while (tpos) {
460f5bd95cSBarry Smith     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
4773a2e727SSatish Balay     gid--; lid--;
4873a2e727SSatish Balay     garray[lid] = gid;
4973a2e727SSatish Balay   }
500064e2bbSSatish Balay   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
510064e2bbSSatish Balay   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
520f5bd95cSBarry Smith   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
5373a2e727SSatish Balay   for (i=0; i<ec; i++) {
540f5bd95cSBarry Smith     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
5573a2e727SSatish Balay   }
5673a2e727SSatish Balay   /* compact out the extra columns in B */
5773a2e727SSatish Balay   for (i=0; i<B->mbs; i++) {
5873a2e727SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
5973a2e727SSatish Balay       int gid1 = aj[B->i[i] + j] + 1;
600f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61fa46199cSSatish Balay       lid --;
6273a2e727SSatish Balay       aj[B->i[i]+j] = lid;
6373a2e727SSatish Balay     }
6473a2e727SSatish Balay   }
6573a2e727SSatish Balay   B->nbs = ec;
6673a2e727SSatish Balay   B->n   = ec*B->bs;
670f5bd95cSBarry Smith   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
6873a2e727SSatish Balay   /* Mark Adams */
6973a2e727SSatish Balay #else
708016bdd1SSatish Balay   /* For the first stab we make an array as long as the number of columns */
71d9653453SSatish Balay   /* mark those columns that are in baij->B */
729dbe0bc3SSatish Balay   indices = (int*)PetscMalloc((Nbs+1)*sizeof(int));CHKPTRQ(indices);
73549d3d68SSatish Balay   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
74d9653453SSatish Balay   for (i=0; i<B->mbs; i++) {
758016bdd1SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
76d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
77d9653453SSatish Balay       indices[aj[B->i[i] + j] ] = 1;
788016bdd1SSatish Balay     }
798016bdd1SSatish Balay   }
808016bdd1SSatish Balay 
818016bdd1SSatish Balay   /* form array of columns we need */
828016bdd1SSatish Balay   garray = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray);
83d9653453SSatish Balay   tmp    = (int*)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp)
848016bdd1SSatish Balay   ec = 0;
85d9653453SSatish Balay   for (i=0; i<Nbs; i++) {
860bdbc534SSatish Balay     if (indices[i]) {
870bdbc534SSatish Balay       garray[ec++] = i;
880bdbc534SSatish Balay     }
898016bdd1SSatish Balay   }
908016bdd1SSatish Balay 
918016bdd1SSatish Balay   /* make indices now point into garray */
928016bdd1SSatish Balay   for (i=0; i<ec; i++) {
93d9653453SSatish Balay     indices[garray[i]] = i;
948016bdd1SSatish Balay   }
958016bdd1SSatish Balay 
968016bdd1SSatish Balay   /* compact out the extra columns in B */
97d9653453SSatish Balay   for (i=0; i<B->mbs; i++) {
988016bdd1SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
99d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
1008016bdd1SSatish Balay     }
1018016bdd1SSatish Balay   }
102d9653453SSatish Balay   B->nbs = ec;
103d9653453SSatish Balay   B->n   = ec*B->bs;
104606d414cSSatish Balay   ierr = PetscFree(indices);CHKERRQ(ierr);
10573a2e727SSatish Balay #endif
1068016bdd1SSatish Balay 
10757b952d6SSatish Balay   for (i=0,col=0; i<ec; i++) {
10857b952d6SSatish Balay     for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
109d9653453SSatish Balay   }
1108016bdd1SSatish Balay   /* create local vector that is used to scatter into */
111029af93fSBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
1128016bdd1SSatish Balay 
113c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
114c16cb8f2SBarry Smith 
115029af93fSBarry Smith   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */
116c16cb8f2SBarry Smith   for (i=0,col=0; i<ec; i++) {
117c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
118c16cb8f2SBarry Smith   }
119029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
120c16cb8f2SBarry Smith   for (i=0,col=0; i<ec; i++) {
121c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
122c16cb8f2SBarry Smith   }
123c16cb8f2SBarry Smith 
124537820f0SBarry Smith   stmp = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(stmp);
125537820f0SBarry Smith   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
126029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
127606d414cSSatish Balay   ierr = PetscFree(stmp);CHKERRQ(ierr);
1288016bdd1SSatish Balay 
1298016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
1308016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
1318016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
1328016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
133d9653453SSatish Balay   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr);
1348016bdd1SSatish Balay 
1358016bdd1SSatish Balay   /* gnerate the scatter context */
136d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
13790f02eecSBarry Smith 
13890f02eecSBarry Smith   /*
13990f02eecSBarry Smith       Post the receives for the first matrix vector product. We sync-chronize after
14090f02eecSBarry Smith     this on the chance that the user immediately calls MatMult() after assemblying
14190f02eecSBarry Smith     the matrix.
14290f02eecSBarry Smith   */
14343a90d84SBarry Smith   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
144ca161407SBarry Smith   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
14590f02eecSBarry Smith 
146d9653453SSatish Balay   PLogObjectParent(mat,baij->Mvctx);
147d9653453SSatish Balay   PLogObjectParent(mat,baij->lvec);
1488016bdd1SSatish Balay   PLogObjectParent(mat,from);
1498016bdd1SSatish Balay   PLogObjectParent(mat,to);
150d9653453SSatish Balay   baij->garray = garray;
1518016bdd1SSatish Balay   PLogObjectMemory(mat,(ec+1)*sizeof(int));
1528016bdd1SSatish Balay   ierr = ISDestroy(from);CHKERRQ(ierr);
1538016bdd1SSatish Balay   ierr = ISDestroy(to);CHKERRQ(ierr);
154888f2ed8SSatish Balay   ierr = VecDestroy(gvec);CHKERRQ(ierr);
155606d414cSSatish Balay   ierr = PetscFree(tmp);CHKERRQ(ierr);
1563a40ed3dSBarry Smith   PetscFunctionReturn(0);
1578016bdd1SSatish Balay }
1588016bdd1SSatish Balay 
1598016bdd1SSatish Balay 
1608016bdd1SSatish Balay /*
161d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1628016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1638016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1648016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1658016bdd1SSatish Balay 
1668016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1678016bdd1SSatish Balay    they are sloppy.
1688016bdd1SSatish Balay */
1695615d1e5SSatish Balay #undef __FUNC__
170b2863d3aSBarry Smith #define __FUNC__ /*<a name=""></a>*/"DisAssemble_MPIBAIJ"
171d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A)
1728016bdd1SSatish Balay {
173d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
174d9653453SSatish Balay   Mat         B = baij->B,Bnew;
175d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
176d9653453SSatish Balay   int         ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
177d9653453SSatish Balay   int         k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
1783eda8832SBarry Smith   MatScalar   *a = Bbaij->a;
1793eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
1803eda8832SBarry Smith   Scalar      *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar));
1813eda8832SBarry Smith   int         l;
182*95d5f7c2SBarry Smith #else
183*95d5f7c2SBarry Smith   Scalar      *atmp;
1843eda8832SBarry Smith #endif
1858016bdd1SSatish Balay 
1863a40ed3dSBarry Smith   PetscFunctionBegin;
1878016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
188888f2ed8SSatish Balay   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */
189d9653453SSatish Balay   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
190d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
191d9653453SSatish Balay   if (baij->colmap) {
192aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
1930f5bd95cSBarry Smith     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
19448e59246SSatish Balay #else
195606d414cSSatish Balay     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
196606d414cSSatish Balay     baij->colmap = 0;
197d9653453SSatish Balay     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
19848e59246SSatish Balay #endif
1998016bdd1SSatish Balay   }
2008016bdd1SSatish Balay 
2018016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
2026d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2033eda8832SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2048016bdd1SSatish Balay 
2058016bdd1SSatish Balay   /* invent new B and copy stuff over */
206d9653453SSatish Balay   nz = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(nz);
207d9653453SSatish Balay   for (i=0; i<mbs; i++) {
208d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
2098016bdd1SSatish Balay   }
210029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
211606d414cSSatish Balay   ierr = PetscFree(nz);CHKERRQ(ierr);
212d9653453SSatish Balay 
213d9653453SSatish Balay   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
214d9653453SSatish Balay   for (i=0; i<mbs; i++) {
215d9653453SSatish Balay     rvals[0] = bs*i;
216d9653453SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
217d9653453SSatish Balay     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
21857b952d6SSatish Balay       col = garray[Bbaij->j[j]]*bs;
219d9653453SSatish Balay       for (k=0; k<bs; k++) {
2203eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2213eda8832SBarry Smith         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
2223eda8832SBarry Smith #else
2233eda8832SBarry Smith         atmp = a+j*bs2;
2243eda8832SBarry Smith #endif
225*95d5f7c2SBarry Smith         ierr = MatSetValues_SeqBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
226d9653453SSatish Balay         col++;
2278016bdd1SSatish Balay       }
2288016bdd1SSatish Balay     }
229d9653453SSatish Balay   }
2303eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2313eda8832SBarry Smith   ierr = PetscFree(atmp);CHKERRQ(ierr);
2323eda8832SBarry Smith #endif
233606d414cSSatish Balay   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
234606d414cSSatish Balay   baij->garray = 0;
235606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
2368016bdd1SSatish Balay   PLogObjectMemory(A,-ec*sizeof(int));
2378016bdd1SSatish Balay   ierr = MatDestroy(B);CHKERRQ(ierr);
2388016bdd1SSatish Balay   PLogObjectParent(A,Bnew);
239d9653453SSatish Balay   baij->B = Bnew;
2408016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
2413a40ed3dSBarry Smith   PetscFunctionReturn(0);
2428016bdd1SSatish Balay }
2438016bdd1SSatish Balay 
2448016bdd1SSatish Balay 
2453eda8832SBarry Smith 
246