xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision ac2a4f0d24b3b6a4ee93edbcad41f4bb9e923944)
1 /*$Id: mmbaij.c,v 1.27 1999/10/13 20:37:30 bsmith Exp bsmith $*/
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 #undef __FUNC__
10 #define __FUNC__ "MatSetUpMultiply_MPIBAIJ"
11 int MatSetUpMultiply_MPIBAIJ(Mat mat)
12 {
13   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ *) mat->data;
14   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ *) (baij->B->data);
15   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
16   int                col,bs = baij->bs,*tmp,*stmp;
17   IS                 from,to;
18   Vec                gvec;
19 #if defined (PETSC_USE_CTABLE)
20   PetscTable         gid1_lid1;
21   PetscTablePosition tpos;
22   int                gid, lid;
23 #endif
24 
25   PetscFunctionBegin;
26 
27 #if defined (PETSC_USE_CTABLE)
28   /* use a table - Mark Adams */
29   PetscTableCreate(B->mbs,&gid1_lid1);
30   for ( i=0; i<B->mbs; i++ ) {
31     for ( j=0; j<B->ilen[i]; j++ ) {
32       int data,gid1 = aj[B->i[i]+j] + 1;
33       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
34       if (!data) {
35         /* one based table */
36         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
37       }
38     }
39   }
40   /* form array of columns we need */
41   garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray);
42   tmp    = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp);
43   ierr   = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
44   while (tpos) {
45     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
46     gid--; lid--;
47     garray[lid] = gid;
48   }
49   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
50   /* qsort( garray, ec, sizeof(int), intcomparcarc ); */
51   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52   for ( i=0; i<ec; i++ ) {
53     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
54   }
55   /* compact out the extra columns in B */
56   for ( i=0; i<B->mbs; i++ ) {
57     for ( j=0; j<B->ilen[i]; j++ ) {
58       int gid1 = aj[B->i[i] + j] + 1;
59       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60       lid --;
61       aj[B->i[i]+j] = lid;
62     }
63   }
64   B->nbs = ec;
65   B->n   = ec*B->bs;
66   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
67   /* Mark Adams */
68 #else
69   /* For the first stab we make an array as long as the number of columns */
70   /* mark those columns that are in baij->B */
71   indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) );CHKPTRQ(indices);
72   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
73   for ( i=0; i<B->mbs; i++ ) {
74     for ( j=0; j<B->ilen[i]; j++ ) {
75       if (!indices[aj[B->i[i] + j]]) ec++;
76       indices[aj[B->i[i] + j] ] = 1;
77     }
78   }
79 
80   /* form array of columns we need */
81   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) );CHKPTRQ(garray);
82   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) );CHKPTRQ(tmp)
83   ec = 0;
84   for ( i=0; i<Nbs; i++ ) {
85     if (indices[i]) {
86       garray[ec++] = i;
87     }
88   }
89 
90   /* make indices now point into garray */
91   for ( i=0; i<ec; i++ ) {
92     indices[garray[i]] = i;
93   }
94 
95   /* compact out the extra columns in B */
96   for ( i=0; i<B->mbs; i++ ) {
97     for ( j=0; j<B->ilen[i]; j++ ) {
98       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
99     }
100   }
101   B->nbs = ec;
102   B->n   = ec*B->bs;
103   ierr = PetscFree(indices);CHKERRQ(ierr);
104 #endif
105 
106   for ( i=0,col=0; i<ec; i++ ) {
107     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
108   }
109   /* create local vector that is used to scatter into */
110   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
111 
112   /* create two temporary index sets for building scatter-gather */
113 
114   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */
115   for ( i=0,col=0; i<ec; i++ ) {
116     garray[i] = bs*garray[i];
117   }
118   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
119   for ( i=0,col=0; i<ec; i++ ) {
120     garray[i] = garray[i]/bs;
121   }
122 
123   stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) );CHKPTRQ(stmp);
124   for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; }
125   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
126   ierr = PetscFree(stmp);CHKERRQ(ierr);
127 
128   /* create temporary global vector to generate scatter context */
129   /* this is inefficient, but otherwise we must do either
130      1) save garray until the first actual scatter when the vector is known or
131      2) have another way of generating a scatter context without a vector.*/
132   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr);
133 
134   /* gnerate the scatter context */
135   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
136 
137   /*
138       Post the receives for the first matrix vector product. We sync-chronize after
139     this on the chance that the user immediately calls MatMult() after assemblying
140     the matrix.
141   */
142   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
143   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
144 
145   PLogObjectParent(mat,baij->Mvctx);
146   PLogObjectParent(mat,baij->lvec);
147   PLogObjectParent(mat,from);
148   PLogObjectParent(mat,to);
149   baij->garray = garray;
150   PLogObjectMemory(mat,(ec+1)*sizeof(int));
151   ierr = ISDestroy(from);CHKERRQ(ierr);
152   ierr = ISDestroy(to);CHKERRQ(ierr);
153   ierr = VecDestroy(gvec);CHKERRQ(ierr);
154   ierr = PetscFree(tmp);CHKERRQ(ierr);
155   PetscFunctionReturn(0);
156 }
157 
158 
159 /*
160      Takes the local part of an already assembled MPIBAIJ matrix
161    and disassembles it. This is to allow new nonzeros into the matrix
162    that require more communication in the matrix vector multiply.
163    Thus certain data-structures must be rebuilt.
164 
165    Kind of slow! But that's what application programmers get when
166    they are sloppy.
167 */
168 #undef __FUNC__
169 #define __FUNC__ "DisAssemble_MPIBAIJ"
170 int DisAssemble_MPIBAIJ(Mat A)
171 {
172   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
173   Mat        B = baij->B,Bnew;
174   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
175   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
176   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
177   Scalar     *a=Bbaij->a;
178 
179   PetscFunctionBegin;
180   /* free stuff related to matrix-vec multiply */
181   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */
182   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
183   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
184   if (baij->colmap) {
185 #if defined (PETSC_USE_CTABLE)
186     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
187 #else
188     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
189     baij->colmap = 0;
190     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
191 #endif
192   }
193 
194   /* make sure that B is assembled so we can access its values */
195   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
196   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
197 
198   /* invent new B and copy stuff over */
199   nz = (int *) PetscMalloc( mbs*sizeof(int) );CHKPTRQ(nz);
200   for ( i=0; i<mbs; i++ ) {
201     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
202   }
203   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
204   ierr = PetscFree(nz);CHKERRQ(ierr);
205 
206   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
207   for ( i=0; i<mbs; i++ ) {
208     rvals[0] = bs*i;
209     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
210     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
211       col = garray[Bbaij->j[j]]*bs;
212       for (k=0; k<bs; k++ ) {
213         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,B->insertmode);CHKERRQ(ierr);
214         col++;
215       }
216     }
217   }
218   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
219   baij->garray = 0;
220   ierr = PetscFree(rvals);CHKERRQ(ierr);
221   PLogObjectMemory(A,-ec*sizeof(int));
222   ierr = MatDestroy(B);CHKERRQ(ierr);
223   PLogObjectParent(A,Bnew);
224   baij->B = Bnew;
225   A->was_assembled = PETSC_FALSE;
226   PetscFunctionReturn(0);
227 }
228 
229 
230