xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision d9653453e4a6fdd381723605f9e4b3ffbc7c37c2)
18016bdd1SSatish Balay #ifndef lint
2*d9653453SSatish Balay static char vcid[] = "$Id: mmbaij.c,v 1.1 1996/06/17 22:34:33 balay Exp balay $";
38016bdd1SSatish Balay #endif
48016bdd1SSatish Balay 
58016bdd1SSatish Balay 
68016bdd1SSatish Balay /*
7*d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
88016bdd1SSatish Balay */
9*d9653453SSatish Balay #include "mpibaij.h"
108016bdd1SSatish Balay #include "src/vec/vecimpl.h"
11*d9653453SSatish Balay #include "../seq/baij.h"
128016bdd1SSatish Balay 
13*d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat)
148016bdd1SSatish Balay {
15*d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
16*d9653453SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data);
17*d9653453SSatish Balay   int        Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
18*d9653453SSatish Balay   int        row,bs = baij->bs,mbs=B->mbs,*tmp;
198016bdd1SSatish Balay   IS         from,to;
208016bdd1SSatish Balay   Vec        gvec;
218016bdd1SSatish Balay 
228016bdd1SSatish Balay   /* For the first stab we make an array as long as the number of columns */
23*d9653453SSatish Balay   /* mark those columns that are in baij->B */
24*d9653453SSatish Balay   indices = (int *) PetscMalloc( Nbs*sizeof(int) ); CHKPTRQ(indices);
25*d9653453SSatish Balay   PetscMemzero(indices,Nbs*sizeof(int));
26*d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
278016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
28*d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
29*d9653453SSatish Balay       indices[aj[B->i[i] + j] ] = 1;
308016bdd1SSatish Balay     }
318016bdd1SSatish Balay   }
328016bdd1SSatish Balay 
338016bdd1SSatish Balay   /* form array of columns we need */
348016bdd1SSatish Balay   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray);
35*d9653453SSatish Balay   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp)
368016bdd1SSatish Balay   ec = 0;
37*d9653453SSatish Balay   for ( i=0; i<Nbs; i++ ) {
388016bdd1SSatish Balay     if (indices[i]) garray[ec++] = i;
398016bdd1SSatish Balay   }
408016bdd1SSatish Balay 
418016bdd1SSatish Balay   /* make indices now point into garray */
428016bdd1SSatish Balay   for ( i=0; i<ec; i++ ) {
43*d9653453SSatish Balay     indices[garray[i]] = i;
448016bdd1SSatish Balay   }
458016bdd1SSatish Balay 
468016bdd1SSatish Balay   /* compact out the extra columns in B */
47*d9653453SSatish Balay   for ( i=0; i<B->mbs; i++ ) {
488016bdd1SSatish Balay     for ( j=0; j<B->ilen[i]; j++ ) {
49*d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
508016bdd1SSatish Balay     }
518016bdd1SSatish Balay   }
52*d9653453SSatish Balay   B->nbs = ec;
53*d9653453SSatish Balay   B->n   = ec*B->bs;
548016bdd1SSatish Balay   PetscFree(indices);
558016bdd1SSatish Balay 
56*d9653453SSatish Balay   for ( i=0,row=0; i<mbs; i++ ) {
57*d9653453SSatish Balay     for ( j=0; j<bs; j++,row++) tmp[row] = garray[i]+j;
58*d9653453SSatish Balay   }
598016bdd1SSatish Balay   /* create local vector that is used to scatter into */
60*d9653453SSatish Balay   ierr = VecCreateSeq(MPI_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr);
618016bdd1SSatish Balay 
628016bdd1SSatish Balay   /* create two temporary Index sets for build scatter gather */
63*d9653453SSatish Balay   ierr = ISCreateSeq(MPI_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr);
64*d9653453SSatish Balay   ierr = ISCreateStrideSeq(MPI_COMM_SELF,ec*bs,0,1,&to); CHKERRQ(ierr);
658016bdd1SSatish Balay 
668016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
678016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
688016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
698016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
70*d9653453SSatish Balay   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr);
718016bdd1SSatish Balay 
728016bdd1SSatish Balay   /* gnerate the scatter context */
73*d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx); CHKERRQ(ierr);
74*d9653453SSatish Balay   PLogObjectParent(mat,baij->Mvctx);
75*d9653453SSatish Balay   PLogObjectParent(mat,baij->lvec);
768016bdd1SSatish Balay   PLogObjectParent(mat,from);
778016bdd1SSatish Balay   PLogObjectParent(mat,to);
78*d9653453SSatish Balay   baij->garray = garray;
798016bdd1SSatish Balay   PLogObjectMemory(mat,(ec+1)*sizeof(int));
808016bdd1SSatish Balay   ierr = ISDestroy(from); CHKERRQ(ierr);
818016bdd1SSatish Balay   ierr = ISDestroy(to); CHKERRQ(ierr);
828016bdd1SSatish Balay   ierr = VecDestroy(gvec);
83*d9653453SSatish Balay   PetscFree(tmp);
848016bdd1SSatish Balay   return 0;
858016bdd1SSatish Balay }
868016bdd1SSatish Balay 
878016bdd1SSatish Balay 
888016bdd1SSatish Balay /*
89*d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
908016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
918016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
928016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
938016bdd1SSatish Balay 
948016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
958016bdd1SSatish Balay    they are sloppy.
968016bdd1SSatish Balay */
97*d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A)
988016bdd1SSatish Balay {
99*d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
100*d9653453SSatish Balay   Mat        B = baij->B,Bnew;
101*d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
102*d9653453SSatish Balay   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
103*d9653453SSatish Balay   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
104*d9653453SSatish Balay   Scalar     *a=Bbaij->a;
1058016bdd1SSatish Balay 
1068016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
107*d9653453SSatish Balay   ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */
108*d9653453SSatish Balay   ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0;
109*d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0;
110*d9653453SSatish Balay   if (baij->colmap) {
111*d9653453SSatish Balay     PetscFree(baij->colmap); baij->colmap = 0;
112*d9653453SSatish Balay     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
1138016bdd1SSatish Balay   }
1148016bdd1SSatish Balay 
1158016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1168016bdd1SSatish Balay   ierr = MatAssemblyBegin(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1178016bdd1SSatish Balay   MatAssemblyEnd(B,FINAL_ASSEMBLY); CHKERRQ(ierr);
1188016bdd1SSatish Balay 
1198016bdd1SSatish Balay   /* invent new B and copy stuff over */
120*d9653453SSatish Balay   nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz);
121*d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
122*d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
1238016bdd1SSatish Balay   }
124*d9653453SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr);
1258016bdd1SSatish Balay   PetscFree(nz);
126*d9653453SSatish Balay 
127*d9653453SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
128*d9653453SSatish Balay   for ( i=0; i<mbs; i++ ) {
129*d9653453SSatish Balay     rvals[0] = bs*i;
130*d9653453SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
131*d9653453SSatish Balay     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
132*d9653453SSatish Balay       col = garray[Bbaij->j[i]]*bs;
133*d9653453SSatish Balay       for (k=0; k<bs; k++ ) {
134*d9653453SSatish Balay         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr);
135*d9653453SSatish Balay         col++;
1368016bdd1SSatish Balay       }
1378016bdd1SSatish Balay     }
138*d9653453SSatish Balay   }
139*d9653453SSatish Balay   PetscFree(baij->garray); baij->garray = 0;
1408016bdd1SSatish Balay   PLogObjectMemory(A,-ec*sizeof(int));
1418016bdd1SSatish Balay   ierr = MatDestroy(B); CHKERRQ(ierr);
1428016bdd1SSatish Balay   PLogObjectParent(A,Bnew);
143*d9653453SSatish Balay   baij->B = Bnew;
1448016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
1458016bdd1SSatish Balay   return 0;
1468016bdd1SSatish Balay }
1478016bdd1SSatish Balay 
1488016bdd1SSatish Balay 
149