xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision b2863d3a52358f7ee22ca3a1f84f250141973e94)
1*b2863d3aSBarry Smith /*$Id: mmbaij.c,v 1.30 2000/04/09 03:09:57 bsmith Exp bsmith $*/
28016bdd1SSatish Balay 
38016bdd1SSatish Balay /*
4d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
58016bdd1SSatish Balay */
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
78016bdd1SSatish Balay #include "src/vec/vecimpl.h"
88016bdd1SSatish Balay 
95615d1e5SSatish Balay #undef __FUNC__
10*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"MatSetUpMultiply_MPIBAIJ"
11d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat)
128016bdd1SSatish Balay {
13d9653453SSatish Balay   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
14d9653453SSatish Balay   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
15d9653453SSatish Balay   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
16537820f0SBarry Smith   int                col,bs = baij->bs,*tmp,*stmp;
178016bdd1SSatish Balay   IS                 from,to;
188016bdd1SSatish Balay   Vec                gvec;
19aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
200f5bd95cSBarry Smith   PetscTable         gid1_lid1;
210f5bd95cSBarry Smith   PetscTablePosition tpos;
2273a2e727SSatish Balay   int                gid,lid;
2373a2e727SSatish Balay #endif
248016bdd1SSatish Balay 
253a40ed3dSBarry Smith   PetscFunctionBegin;
2673a2e727SSatish Balay 
27aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
2873a2e727SSatish Balay   /* use a table - Mark Adams */
290f5bd95cSBarry Smith   PetscTableCreate(B->mbs,&gid1_lid1);
3073a2e727SSatish Balay   for (i=0; i<B->mbs; i++) {
3173a2e727SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
32fa46199cSSatish Balay       int data,gid1 = aj[B->i[i]+j] + 1;
330f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
34fa46199cSSatish Balay       if (!data) {
3573a2e727SSatish Balay         /* one based table */
360f5bd95cSBarry Smith         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
3773a2e727SSatish Balay       }
3873a2e727SSatish Balay     }
3973a2e727SSatish Balay   }
4073a2e727SSatish Balay   /* form array of columns we need */
4173a2e727SSatish Balay   garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray);
4273a2e727SSatish Balay   tmp    = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp);
430f5bd95cSBarry Smith   ierr   = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
4473a2e727SSatish Balay   while (tpos) {
450f5bd95cSBarry Smith     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
4673a2e727SSatish Balay     gid--; lid--;
4773a2e727SSatish Balay     garray[lid] = gid;
4873a2e727SSatish Balay   }
490064e2bbSSatish Balay   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
500064e2bbSSatish Balay   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
510f5bd95cSBarry Smith   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
5273a2e727SSatish Balay   for (i=0; i<ec; i++) {
530f5bd95cSBarry Smith     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
5473a2e727SSatish Balay   }
5573a2e727SSatish Balay   /* compact out the extra columns in B */
5673a2e727SSatish Balay   for (i=0; i<B->mbs; i++) {
5773a2e727SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
5873a2e727SSatish Balay       int gid1 = aj[B->i[i] + j] + 1;
590f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60fa46199cSSatish Balay       lid --;
6173a2e727SSatish Balay       aj[B->i[i]+j] = lid;
6273a2e727SSatish Balay     }
6373a2e727SSatish Balay   }
6473a2e727SSatish Balay   B->nbs = ec;
6573a2e727SSatish Balay   B->n   = ec*B->bs;
660f5bd95cSBarry Smith   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
6773a2e727SSatish Balay   /* Mark Adams */
6873a2e727SSatish Balay #else
698016bdd1SSatish Balay   /* For the first stab we make an array as long as the number of columns */
70d9653453SSatish Balay   /* mark those columns that are in baij->B */
719dbe0bc3SSatish Balay   indices = (int*)PetscMalloc((Nbs+1)*sizeof(int));CHKPTRQ(indices);
72549d3d68SSatish Balay   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
73d9653453SSatish Balay   for (i=0; i<B->mbs; i++) {
748016bdd1SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
75d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
76d9653453SSatish Balay       indices[aj[B->i[i] + j] ] = 1;
778016bdd1SSatish Balay     }
788016bdd1SSatish Balay   }
798016bdd1SSatish Balay 
808016bdd1SSatish Balay   /* form array of columns we need */
818016bdd1SSatish Balay   garray = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray);
82d9653453SSatish Balay   tmp    = (int*)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp)
838016bdd1SSatish Balay   ec = 0;
84d9653453SSatish Balay   for (i=0; i<Nbs; i++) {
850bdbc534SSatish Balay     if (indices[i]) {
860bdbc534SSatish Balay       garray[ec++] = i;
870bdbc534SSatish Balay     }
888016bdd1SSatish Balay   }
898016bdd1SSatish Balay 
908016bdd1SSatish Balay   /* make indices now point into garray */
918016bdd1SSatish Balay   for (i=0; i<ec; i++) {
92d9653453SSatish Balay     indices[garray[i]] = i;
938016bdd1SSatish Balay   }
948016bdd1SSatish Balay 
958016bdd1SSatish Balay   /* compact out the extra columns in B */
96d9653453SSatish Balay   for (i=0; i<B->mbs; i++) {
978016bdd1SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
98d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
998016bdd1SSatish Balay     }
1008016bdd1SSatish Balay   }
101d9653453SSatish Balay   B->nbs = ec;
102d9653453SSatish Balay   B->n   = ec*B->bs;
103606d414cSSatish Balay   ierr = PetscFree(indices);CHKERRQ(ierr);
10473a2e727SSatish Balay #endif
1058016bdd1SSatish Balay 
10657b952d6SSatish Balay   for (i=0,col=0; i<ec; i++) {
10757b952d6SSatish Balay     for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
108d9653453SSatish Balay   }
1098016bdd1SSatish Balay   /* create local vector that is used to scatter into */
110029af93fSBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
1118016bdd1SSatish Balay 
112c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
113c16cb8f2SBarry Smith 
114029af93fSBarry Smith   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */
115c16cb8f2SBarry Smith   for (i=0,col=0; i<ec; i++) {
116c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
117c16cb8f2SBarry Smith   }
118029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
119c16cb8f2SBarry Smith   for (i=0,col=0; i<ec; i++) {
120c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
121c16cb8f2SBarry Smith   }
122c16cb8f2SBarry Smith 
123537820f0SBarry Smith   stmp = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(stmp);
124537820f0SBarry Smith   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
125029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
126606d414cSSatish Balay   ierr = PetscFree(stmp);CHKERRQ(ierr);
1278016bdd1SSatish Balay 
1288016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
1298016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
1308016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
1318016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
132d9653453SSatish Balay   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr);
1338016bdd1SSatish Balay 
1348016bdd1SSatish Balay   /* gnerate the scatter context */
135d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
13690f02eecSBarry Smith 
13790f02eecSBarry Smith   /*
13890f02eecSBarry Smith       Post the receives for the first matrix vector product. We sync-chronize after
13990f02eecSBarry Smith     this on the chance that the user immediately calls MatMult() after assemblying
14090f02eecSBarry Smith     the matrix.
14190f02eecSBarry Smith   */
14243a90d84SBarry Smith   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
143ca161407SBarry Smith   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
14490f02eecSBarry Smith 
145d9653453SSatish Balay   PLogObjectParent(mat,baij->Mvctx);
146d9653453SSatish Balay   PLogObjectParent(mat,baij->lvec);
1478016bdd1SSatish Balay   PLogObjectParent(mat,from);
1488016bdd1SSatish Balay   PLogObjectParent(mat,to);
149d9653453SSatish Balay   baij->garray = garray;
1508016bdd1SSatish Balay   PLogObjectMemory(mat,(ec+1)*sizeof(int));
1518016bdd1SSatish Balay   ierr = ISDestroy(from);CHKERRQ(ierr);
1528016bdd1SSatish Balay   ierr = ISDestroy(to);CHKERRQ(ierr);
153888f2ed8SSatish Balay   ierr = VecDestroy(gvec);CHKERRQ(ierr);
154606d414cSSatish Balay   ierr = PetscFree(tmp);CHKERRQ(ierr);
1553a40ed3dSBarry Smith   PetscFunctionReturn(0);
1568016bdd1SSatish Balay }
1578016bdd1SSatish Balay 
1588016bdd1SSatish Balay 
1598016bdd1SSatish Balay /*
160d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1618016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1628016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1638016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1648016bdd1SSatish Balay 
1658016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1668016bdd1SSatish Balay    they are sloppy.
1678016bdd1SSatish Balay */
1685615d1e5SSatish Balay #undef __FUNC__
169*b2863d3aSBarry Smith #define  __FUNC__ /*<a name=""></a>*/"DisAssemble_MPIBAIJ"
170d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A)
1718016bdd1SSatish Balay {
172d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
173d9653453SSatish Balay   Mat         B = baij->B,Bnew;
174d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
175d9653453SSatish Balay   int         ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
176d9653453SSatish Balay   int         k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
1773eda8832SBarry Smith   MatScalar   *a = Bbaij->a;
1783eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
1793eda8832SBarry Smith   Scalar      *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar));
1803eda8832SBarry Smith   int         l;
1813eda8832SBarry Smith #endif
1828016bdd1SSatish Balay 
1833a40ed3dSBarry Smith   PetscFunctionBegin;
1848016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
185888f2ed8SSatish Balay   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */
186d9653453SSatish Balay   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
187d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
188d9653453SSatish Balay   if (baij->colmap) {
189aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
1900f5bd95cSBarry Smith     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
19148e59246SSatish Balay #else
192606d414cSSatish Balay     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
193606d414cSSatish Balay     baij->colmap = 0;
194d9653453SSatish Balay     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
19548e59246SSatish Balay #endif
1968016bdd1SSatish Balay   }
1978016bdd1SSatish Balay 
1988016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1996d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2003eda8832SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2018016bdd1SSatish Balay 
2028016bdd1SSatish Balay   /* invent new B and copy stuff over */
203d9653453SSatish Balay   nz = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(nz);
204d9653453SSatish Balay   for (i=0; i<mbs; i++) {
205d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
2068016bdd1SSatish Balay   }
207029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
208606d414cSSatish Balay   ierr = PetscFree(nz);CHKERRQ(ierr);
209d9653453SSatish Balay 
210d9653453SSatish Balay   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
211d9653453SSatish Balay   for (i=0; i<mbs; i++) {
212d9653453SSatish Balay     rvals[0] = bs*i;
213d9653453SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
214d9653453SSatish Balay     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
21557b952d6SSatish Balay       col = garray[Bbaij->j[j]]*bs;
216d9653453SSatish Balay       for (k=0; k<bs; k++) {
2173eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2183eda8832SBarry Smith         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
2193eda8832SBarry Smith #else
2203eda8832SBarry Smith         atmp = a+j*bs2;
2213eda8832SBarry Smith #endif
2223eda8832SBarry Smith         ierr = MatSetValues(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
223d9653453SSatish Balay         col++;
2248016bdd1SSatish Balay       }
2258016bdd1SSatish Balay     }
226d9653453SSatish Balay   }
2273eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2283eda8832SBarry Smith   ierr = PetscFree(atmp);CHKERRQ(ierr);
2293eda8832SBarry Smith #endif
230606d414cSSatish Balay   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
231606d414cSSatish Balay   baij->garray = 0;
232606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
2338016bdd1SSatish Balay   PLogObjectMemory(A,-ec*sizeof(int));
2348016bdd1SSatish Balay   ierr = MatDestroy(B);CHKERRQ(ierr);
2358016bdd1SSatish Balay   PLogObjectParent(A,Bnew);
236d9653453SSatish Balay   baij->B = Bnew;
2378016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
2383a40ed3dSBarry Smith   PetscFunctionReturn(0);
2398016bdd1SSatish Balay }
2408016bdd1SSatish Balay 
2418016bdd1SSatish Balay 
2423eda8832SBarry Smith 
243