xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision fa46199cd6736f835a9dfd5fb06fcbb3763bab4e)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mmbaij.c,v 1.21 1999/01/31 16:07:13 bsmith Exp balay $";
3 #endif
4 
5 
6 /*
7    Support for the parallel BAIJ matrix vector multiply
8 */
9 #include "src/mat/impls/baij/mpi/mpibaij.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNC__
13 #define __FUNC__ "MatSetUpMultiply_MPIBAIJ"
14 int MatSetUpMultiply_MPIBAIJ(Mat mat)
15 {
16   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
17   Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data);
18   int        Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
19   int        col,bs = baij->bs,*tmp,*stmp;
20   IS         from,to;
21   Vec        gvec;
22 #if defined (USE_CTABLE)
23   Table      gid1_lid1;
24   CTablePos  tpos;
25   int        gid, lid;
26 #endif
27 
28   PetscFunctionBegin;
29 
30 #if defined (USE_CTABLE)
31   /* use a table - Mark Adams */
32   TableCreate(B->mbs,&gid1_lid1);
33   for ( i=0; i<B->mbs; i++ ) {
34     for ( j=0; j<B->ilen[i]; j++ ) {
35       int data,gid1 = aj[B->i[i]+j] + 1;
36       ierr = TableFind(gid1_lid1,gid1,&data) ; CHKERRQ(ierr);
37       if (!data) {
38         /* one based table */
39         ierr = TableAdd(gid1_lid1,gid1,++ec); CHKERRQ(ierr);
40       }
41     }
42   }
43   /* form array of columns we need */
44   garray = (int *)PetscMalloc((ec+1)*sizeof(int)); CHKPTRQ(garray);
45   tmp    = (int *)PetscMalloc((ec*bs+1)*sizeof(int)); CHKPTRQ(tmp);
46   ierr   = TableGetHeadPosition(gid1_lid1,&tpos); CHKERRQ(ierr);
47   while (tpos) {
48     ierr = TableGetNext(gid1_lid1,&tpos,&gid,&lid); CHKERRQ(ierr);
49     gid--; lid--;
50     garray[lid] = gid;
51   }
52   ierr = PetscSortInt(ec,garray); CHKERRQ(ierr);
53   /* qsort( garray, ec, sizeof(int), intcomparcarc ); */
54   TableRemoveAll(gid1_lid1);
55   for ( i=0; i<ec; i++ ) {
56     ierr = TableAdd(gid1_lid1,garray[i]+1,i+1); CHKERRQ(ierr);
57   }
58   /* compact out the extra columns in B */
59   for ( i=0; i<B->mbs; i++ ) {
60     for ( j=0; j<B->ilen[i]; j++ ) {
61       int gid1 = aj[B->i[i] + j] + 1;
62       ierr = TableFind(gid1_lid1,gid1,&lid); CHKERRQ(ierr);
63       lid --;
64       aj[B->i[i]+j] = lid;
65     }
66   }
67   B->nbs = ec;
68   B->n   = ec*B->bs;
69   TableDelete(gid1_lid1);
70   /* Mark Adams */
71 #else
72   /* For the first stab we make an array as long as the number of columns */
73   /* mark those columns that are in baij->B */
74   indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) ); CHKPTRQ(indices);
75   PetscMemzero(indices,Nbs*sizeof(int));
76   for ( i=0; i<B->mbs; i++ ) {
77     for ( j=0; j<B->ilen[i]; j++ ) {
78       if (!indices[aj[B->i[i] + j]]) ec++;
79       indices[aj[B->i[i] + j] ] = 1;
80     }
81   }
82 
83   /* form array of columns we need */
84   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray);
85   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp)
86   ec = 0;
87   for ( i=0; i<Nbs; i++ ) {
88     if (indices[i]) {
89       garray[ec++] = i;
90     }
91   }
92 
93   /* make indices now point into garray */
94   for ( i=0; i<ec; i++ ) {
95     indices[garray[i]] = i;
96   }
97 
98   /* compact out the extra columns in B */
99   for ( i=0; i<B->mbs; i++ ) {
100     for ( j=0; j<B->ilen[i]; j++ ) {
101       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
102     }
103   }
104   B->nbs = ec;
105   B->n   = ec*B->bs;
106   PetscFree(indices);
107 #endif
108 
109   for ( i=0,col=0; i<ec; i++ ) {
110     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
111   }
112   /* create local vector that is used to scatter into */
113   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr);
114 
115   /* create two temporary index sets for building scatter-gather */
116 
117   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */
118   for ( i=0,col=0; i<ec; i++ ) {
119     garray[i] = bs*garray[i];
120   }
121   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
122   for ( i=0,col=0; i<ec; i++ ) {
123     garray[i] = garray[i]/bs;
124   }
125 
126   stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp);
127   for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; }
128   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
129   PetscFree(stmp);
130 
131   /* create temporary global vector to generate scatter context */
132   /* this is inefficient, but otherwise we must do either
133      1) save garray until the first actual scatter when the vector is known or
134      2) have another way of generating a scatter context without a vector.*/
135   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr);
136 
137   /* gnerate the scatter context */
138   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
139 
140   /*
141       Post the receives for the first matrix vector product. We sync-chronize after
142     this on the chance that the user immediately calls MatMult() after assemblying
143     the matrix.
144   */
145   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
146   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
147 
148   PLogObjectParent(mat,baij->Mvctx);
149   PLogObjectParent(mat,baij->lvec);
150   PLogObjectParent(mat,from);
151   PLogObjectParent(mat,to);
152   baij->garray = garray;
153   PLogObjectMemory(mat,(ec+1)*sizeof(int));
154   ierr = ISDestroy(from); CHKERRQ(ierr);
155   ierr = ISDestroy(to); CHKERRQ(ierr);
156   ierr = VecDestroy(gvec);
157   PetscFree(tmp);
158   PetscFunctionReturn(0);
159 }
160 
161 
162 /*
163      Takes the local part of an already assembled MPIBAIJ matrix
164    and disassembles it. This is to allow new nonzeros into the matrix
165    that require more communication in the matrix vector multiply.
166    Thus certain data-structures must be rebuilt.
167 
168    Kind of slow! But that's what application programmers get when
169    they are sloppy.
170 */
171 #undef __FUNC__
172 #define __FUNC__ "DisAssemble_MPIBAIJ"
173 int DisAssemble_MPIBAIJ(Mat A)
174 {
175   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
176   Mat        B = baij->B,Bnew;
177   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
178   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
179   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
180   Scalar     *a=Bbaij->a;
181 
182   PetscFunctionBegin;
183   /* free stuff related to matrix-vec multiply */
184   ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */
185   ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0;
186   ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0;
187   if (baij->colmap) {
188 #if defined (USE_CTABLE)
189     TableDelete(baij->colmap); baij->colmap = 0;
190 #else
191     PetscFree(baij->colmap); baij->colmap = 0;
192     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
193 #endif
194   }
195 
196   /* make sure that B is assembled so we can access its values */
197   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
198   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
199 
200   /* invent new B and copy stuff over */
201   nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz);
202   for ( i=0; i<mbs; i++ ) {
203     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
204   }
205   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr);
206   PetscFree(nz);
207 
208   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
209   for ( i=0; i<mbs; i++ ) {
210     rvals[0] = bs*i;
211     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
212     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
213       col = garray[Bbaij->j[j]]*bs;
214       for (k=0; k<bs; k++ ) {
215         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,B->insertmode);CHKERRQ(ierr);
216         col++;
217       }
218     }
219   }
220   PetscFree(baij->garray); baij->garray = 0;
221   PetscFree(rvals);
222   PLogObjectMemory(A,-ec*sizeof(int));
223   ierr = MatDestroy(B); CHKERRQ(ierr);
224   PLogObjectParent(A,Bnew);
225   baij->B = Bnew;
226   A->was_assembled = PETSC_FALSE;
227   PetscFunctionReturn(0);
228 }
229 
230 
231