xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 026e39d04bcf2a445c0a9332c845c324154b2e8f)
1 #ifndef lint
2 static char vcid[] = "$Id: mmbaij.c,v 1.9 1996/11/27 22:53:55 bsmith Exp balay $";
3 #endif
4 
5 
6 /*
7    Support for the parallel BAIJ matrix vector multiply
8 */
9 #include "src/mat/impls/baij/mpi/mpibaij.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNCTION__
13 #define __FUNCTION__ "MatSetUpMultiply_MPIBAIJ"
14 int MatSetUpMultiply_MPIBAIJ(Mat mat)
15 {
16   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
17   Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data);
18   int        Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
19   int        col,bs = baij->bs,*tmp,*stmp;
20   IS         from,to;
21   Vec        gvec;
22 
23   /* For the first stab we make an array as long as the number of columns */
24   /* mark those columns that are in baij->B */
25   indices = (int *) PetscMalloc( Nbs*sizeof(int) ); CHKPTRQ(indices);
26   PetscMemzero(indices,Nbs*sizeof(int));
27   for ( i=0; i<B->mbs; i++ ) {
28     for ( j=0; j<B->ilen[i]; j++ ) {
29       if (!indices[aj[B->i[i] + j]]) ec++;
30       indices[aj[B->i[i] + j] ] = 1;
31     }
32   }
33 
34   /* form array of columns we need */
35   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray);
36   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp)
37   ec = 0;
38   for ( i=0; i<Nbs; i++ ) {
39     if (indices[i]) garray[ec++] = i;
40   }
41 
42   /* make indices now point into garray */
43   for ( i=0; i<ec; i++ ) {
44     indices[garray[i]] = i;
45   }
46 
47   /* compact out the extra columns in B */
48   for ( i=0; i<B->mbs; i++ ) {
49     for ( j=0; j<B->ilen[i]; j++ ) {
50       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
51     }
52   }
53   B->nbs = ec;
54   B->n   = ec*B->bs;
55   PetscFree(indices);
56 
57   for ( i=0,col=0; i<ec; i++ ) {
58     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
59   }
60   /* create local vector that is used to scatter into */
61   ierr = VecCreateSeq(MPI_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr);
62 
63   /* create two temporary index sets for building scatter-gather */
64 
65   /* ierr = ISCreateGeneral(MPI_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */
66   for ( i=0,col=0; i<ec; i++ ) {
67     garray[i] = bs*garray[i];
68   }
69   ierr = ISCreateBlock(MPI_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
70   for ( i=0,col=0; i<ec; i++ ) {
71     garray[i] = garray[i]/bs;
72   }
73 
74   stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp);
75   for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; }
76   ierr = ISCreateBlock(MPI_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
77   PetscFree(stmp);
78 
79   /* create temporary global vector to generate scatter context */
80   /* this is inefficient, but otherwise we must do either
81      1) save garray until the first actual scatter when the vector is known or
82      2) have another way of generating a scatter context without a vector.*/
83   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr);
84 
85   /* gnerate the scatter context */
86   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
87 
88   /*
89       Post the receives for the first matrix vector product. We sync-chronize after
90     this on the chance that the user immediately calls MatMult() after assemblying
91     the matrix.
92   */
93   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
94   MPI_Barrier(mat->comm);
95 
96   PLogObjectParent(mat,baij->Mvctx);
97   PLogObjectParent(mat,baij->lvec);
98   PLogObjectParent(mat,from);
99   PLogObjectParent(mat,to);
100   baij->garray = garray;
101   PLogObjectMemory(mat,(ec+1)*sizeof(int));
102   ierr = ISDestroy(from); CHKERRQ(ierr);
103   ierr = ISDestroy(to); CHKERRQ(ierr);
104   ierr = VecDestroy(gvec);
105   PetscFree(tmp);
106   return 0;
107 }
108 
109 
110 /*
111      Takes the local part of an already assembled MPIBAIJ matrix
112    and disassembles it. This is to allow new nonzeros into the matrix
113    that require more communication in the matrix vector multiply.
114    Thus certain data-structures must be rebuilt.
115 
116    Kind of slow! But that's what application programmers get when
117    they are sloppy.
118 */
119 #undef __FUNCTION__
120 #define __FUNCTION__ "DisAssemble_MPIBAIJ"
121 int DisAssemble_MPIBAIJ(Mat A)
122 {
123   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
124   Mat        B = baij->B,Bnew;
125   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
126   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
127   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
128   Scalar     *a=Bbaij->a;
129 
130   /* free stuff related to matrix-vec multiply */
131   ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */
132   ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0;
133   ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0;
134   if (baij->colmap) {
135     PetscFree(baij->colmap); baij->colmap = 0;
136     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
137   }
138 
139   /* make sure that B is assembled so we can access its values */
140   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
141   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
142 
143   /* invent new B and copy stuff over */
144   nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz);
145   for ( i=0; i<mbs; i++ ) {
146     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
147   }
148   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr);
149   PetscFree(nz);
150 
151   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
152   for ( i=0; i<mbs; i++ ) {
153     rvals[0] = bs*i;
154     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
155     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
156       col = garray[Bbaij->j[j]]*bs;
157       for (k=0; k<bs; k++ ) {
158         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr);
159         col++;
160       }
161     }
162   }
163   PetscFree(baij->garray); baij->garray = 0;
164   PetscFree(rvals);
165   PLogObjectMemory(A,-ec*sizeof(int));
166   ierr = MatDestroy(B); CHKERRQ(ierr);
167   PLogObjectParent(A,Bnew);
168   baij->B = Bnew;
169   A->was_assembled = PETSC_FALSE;
170   return 0;
171 }
172 
173 
174