xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 48e59246f5401e5a7adbba88193a2a53d0c4f8b6)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*48e59246SSatish Balay static char vcid[] = "$Id: mmbaij.c,v 1.17 1998/01/10 05:12:13 balay Exp balay $";
38016bdd1SSatish Balay #endif
48016bdd1SSatish Balay 
58016bdd1SSatish Balay 
68016bdd1SSatish Balay /*
7d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
88016bdd1SSatish Balay */
970f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
108016bdd1SSatish Balay #include "src/vec/vecimpl.h"
118016bdd1SSatish Balay 
125615d1e5SSatish Balay #undef __FUNC__
135615d1e5SSatish Balay #define __FUNC__ "MatSetUpMultiply_MPIBAIJ"
14d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat)
158016bdd1SSatish Balay {
16d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
17d9653453SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data);
18d9653453SSatish Balay   int        Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
19537820f0SBarry Smith   int        col,bs = baij->bs,*tmp,*stmp;
208016bdd1SSatish Balay   IS         from,to;
218016bdd1SSatish Balay   Vec        gvec;
228016bdd1SSatish Balay 
233a40ed3dSBarry Smith   PetscFunctionBegin;
248016bdd1SSatish Balay   /* For the first stab we make an array as long as the number of columns */
25d9653453SSatish Balay   /* mark those columns that are in baij->B */
269dbe0bc3SSatish Balay   indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) ); CHKPTRQ(indices);
27d9653453SSatish Balay   PetscMemzero(indices,Nbs*sizeof(int));
28d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
298016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
30d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
31d9653453SSatish Balay       indices[aj[B->i[i] + j] ] = 1;
328016bdd1SSatish Balay     }
338016bdd1SSatish Balay   }
348016bdd1SSatish Balay 
358016bdd1SSatish Balay   /* form array of columns we need */
368016bdd1SSatish Balay   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray);
37d9653453SSatish Balay   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp)
388016bdd1SSatish Balay   ec = 0;
39d9653453SSatish Balay   for ( i=0; i<Nbs; i++ ) {
400bdbc534SSatish Balay     if (indices[i]) {
410bdbc534SSatish Balay       garray[ec++] = i;
420bdbc534SSatish Balay     }
438016bdd1SSatish Balay   }
448016bdd1SSatish Balay 
458016bdd1SSatish Balay   /* make indices now point into garray */
468016bdd1SSatish Balay   for ( i=0; i<ec; i++ ) {
47d9653453SSatish Balay     indices[garray[i]] = i;
488016bdd1SSatish Balay   }
498016bdd1SSatish Balay 
508016bdd1SSatish Balay   /* compact out the extra columns in B */
51d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
528016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
53d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
548016bdd1SSatish Balay     }
558016bdd1SSatish Balay   }
56d9653453SSatish Balay   B->nbs = ec;
57d9653453SSatish Balay   B->n   = ec*B->bs;
588016bdd1SSatish Balay   PetscFree(indices);
598016bdd1SSatish Balay 
6057b952d6SSatish Balay   for ( i=0,col=0; i<ec; i++ ) {
6157b952d6SSatish Balay     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
62d9653453SSatish Balay   }
638016bdd1SSatish Balay   /* create local vector that is used to scatter into */
64029af93fSBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr);
658016bdd1SSatish Balay 
66c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
67c16cb8f2SBarry Smith 
68029af93fSBarry Smith   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */
69c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
70c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
71c16cb8f2SBarry Smith   }
72029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
73c16cb8f2SBarry Smith   for ( i=0,col=0; i<ec; i++ ) {
74c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
75c16cb8f2SBarry Smith   }
76c16cb8f2SBarry Smith 
77537820f0SBarry Smith   stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp);
78537820f0SBarry Smith   for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; }
79029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
80537820f0SBarry Smith   PetscFree(stmp);
818016bdd1SSatish Balay 
828016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
838016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
848016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
858016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
86d9653453SSatish Balay   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr);
878016bdd1SSatish Balay 
888016bdd1SSatish Balay   /* gnerate the scatter context */
89d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
9090f02eecSBarry Smith 
9190f02eecSBarry Smith   /*
9290f02eecSBarry Smith       Post the receives for the first matrix vector product. We sync-chronize after
9390f02eecSBarry Smith     this on the chance that the user immediately calls MatMult() after assemblying
9490f02eecSBarry Smith     the matrix.
9590f02eecSBarry Smith   */
9643a90d84SBarry Smith   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
97ca161407SBarry Smith   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
9890f02eecSBarry Smith 
99d9653453SSatish Balay   PLogObjectParent(mat,baij->Mvctx);
100d9653453SSatish Balay   PLogObjectParent(mat,baij->lvec);
1018016bdd1SSatish Balay   PLogObjectParent(mat,from);
1028016bdd1SSatish Balay   PLogObjectParent(mat,to);
103d9653453SSatish Balay   baij->garray = garray;
1048016bdd1SSatish Balay   PLogObjectMemory(mat,(ec+1)*sizeof(int));
1058016bdd1SSatish Balay   ierr = ISDestroy(from); CHKERRQ(ierr);
1068016bdd1SSatish Balay   ierr = ISDestroy(to); CHKERRQ(ierr);
1078016bdd1SSatish Balay   ierr = VecDestroy(gvec);
108d9653453SSatish Balay   PetscFree(tmp);
1093a40ed3dSBarry Smith   PetscFunctionReturn(0);
1108016bdd1SSatish Balay }
1118016bdd1SSatish Balay 
1128016bdd1SSatish Balay 
1138016bdd1SSatish Balay /*
114d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1158016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1168016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1178016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1188016bdd1SSatish Balay 
1198016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1208016bdd1SSatish Balay    they are sloppy.
1218016bdd1SSatish Balay */
1225615d1e5SSatish Balay #undef __FUNC__
1235615d1e5SSatish Balay #define __FUNC__ "DisAssemble_MPIBAIJ"
124d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A)
1258016bdd1SSatish Balay {
126d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
127d9653453SSatish Balay   Mat        B = baij->B,Bnew;
128d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
129d9653453SSatish Balay   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
130d9653453SSatish Balay   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
131d9653453SSatish Balay   Scalar     *a=Bbaij->a;
1328016bdd1SSatish Balay 
1333a40ed3dSBarry Smith   PetscFunctionBegin;
1348016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
135d9653453SSatish Balay   ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */
136d9653453SSatish Balay   ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0;
137d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0;
138d9653453SSatish Balay   if (baij->colmap) {
139*48e59246SSatish Balay #if defined (USE_CTABLE)
140*48e59246SSatish Balay     TableDelete(baij->colmap); baij->colmap = 0;
141*48e59246SSatish Balay #else
142d9653453SSatish Balay     PetscFree(baij->colmap); baij->colmap = 0;
143d9653453SSatish Balay     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
144*48e59246SSatish Balay #endif
1458016bdd1SSatish Balay   }
1468016bdd1SSatish Balay 
1478016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1486d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1496d4a8577SBarry Smith   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1508016bdd1SSatish Balay 
1518016bdd1SSatish Balay   /* invent new B and copy stuff over */
152d9653453SSatish Balay   nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz);
153d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
154d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
1558016bdd1SSatish Balay   }
156029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr);
1578016bdd1SSatish Balay   PetscFree(nz);
158d9653453SSatish Balay 
159d9653453SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
160d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
161d9653453SSatish Balay     rvals[0] = bs*i;
162d9653453SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
163d9653453SSatish Balay     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
16457b952d6SSatish Balay       col = garray[Bbaij->j[j]]*bs;
165d9653453SSatish Balay       for (k=0; k<bs; k++ ) {
166d9653453SSatish Balay         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr);
167d9653453SSatish Balay         col++;
1688016bdd1SSatish Balay       }
1698016bdd1SSatish Balay     }
170d9653453SSatish Balay   }
171d9653453SSatish Balay   PetscFree(baij->garray); baij->garray = 0;
17257b952d6SSatish Balay   PetscFree(rvals);
1738016bdd1SSatish Balay   PLogObjectMemory(A,-ec*sizeof(int));
1748016bdd1SSatish Balay   ierr = MatDestroy(B); CHKERRQ(ierr);
1758016bdd1SSatish Balay   PLogObjectParent(A,Bnew);
176d9653453SSatish Balay   baij->B = Bnew;
1778016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
1783a40ed3dSBarry Smith   PetscFunctionReturn(0);
1798016bdd1SSatish Balay }
1808016bdd1SSatish Balay 
1818016bdd1SSatish Balay 
182