xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision b2863d3a52358f7ee22ca3a1f84f250141973e94)
1 /*$Id: mmbaij.c,v 1.30 2000/04/09 03:09:57 bsmith Exp bsmith $*/
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 #undef __FUNC__
10 #define  __FUNC__ /*<a name=""></a>*/"MatSetUpMultiply_MPIBAIJ"
11 int MatSetUpMultiply_MPIBAIJ(Mat mat)
12 {
13   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
14   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
15   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
16   int                col,bs = baij->bs,*tmp,*stmp;
17   IS                 from,to;
18   Vec                gvec;
19 #if defined (PETSC_USE_CTABLE)
20   PetscTable         gid1_lid1;
21   PetscTablePosition tpos;
22   int                gid,lid;
23 #endif
24 
25   PetscFunctionBegin;
26 
27 #if defined (PETSC_USE_CTABLE)
28   /* use a table - Mark Adams */
29   PetscTableCreate(B->mbs,&gid1_lid1);
30   for (i=0; i<B->mbs; i++) {
31     for (j=0; j<B->ilen[i]; j++) {
32       int data,gid1 = aj[B->i[i]+j] + 1;
33       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
34       if (!data) {
35         /* one based table */
36         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
37       }
38     }
39   }
40   /* form array of columns we need */
41   garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray);
42   tmp    = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp);
43   ierr   = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
44   while (tpos) {
45     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
46     gid--; lid--;
47     garray[lid] = gid;
48   }
49   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
50   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
51   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52   for (i=0; i<ec; i++) {
53     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
54   }
55   /* compact out the extra columns in B */
56   for (i=0; i<B->mbs; i++) {
57     for (j=0; j<B->ilen[i]; j++) {
58       int gid1 = aj[B->i[i] + j] + 1;
59       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60       lid --;
61       aj[B->i[i]+j] = lid;
62     }
63   }
64   B->nbs = ec;
65   B->n   = ec*B->bs;
66   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
67   /* Mark Adams */
68 #else
69   /* For the first stab we make an array as long as the number of columns */
70   /* mark those columns that are in baij->B */
71   indices = (int*)PetscMalloc((Nbs+1)*sizeof(int));CHKPTRQ(indices);
72   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
73   for (i=0; i<B->mbs; i++) {
74     for (j=0; j<B->ilen[i]; j++) {
75       if (!indices[aj[B->i[i] + j]]) ec++;
76       indices[aj[B->i[i] + j] ] = 1;
77     }
78   }
79 
80   /* form array of columns we need */
81   garray = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray);
82   tmp    = (int*)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp)
83   ec = 0;
84   for (i=0; i<Nbs; i++) {
85     if (indices[i]) {
86       garray[ec++] = i;
87     }
88   }
89 
90   /* make indices now point into garray */
91   for (i=0; i<ec; i++) {
92     indices[garray[i]] = i;
93   }
94 
95   /* compact out the extra columns in B */
96   for (i=0; i<B->mbs; i++) {
97     for (j=0; j<B->ilen[i]; j++) {
98       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
99     }
100   }
101   B->nbs = ec;
102   B->n   = ec*B->bs;
103   ierr = PetscFree(indices);CHKERRQ(ierr);
104 #endif
105 
106   for (i=0,col=0; i<ec; i++) {
107     for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
108   }
109   /* create local vector that is used to scatter into */
110   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
111 
112   /* create two temporary index sets for building scatter-gather */
113 
114   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */
115   for (i=0,col=0; i<ec; i++) {
116     garray[i] = bs*garray[i];
117   }
118   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
119   for (i=0,col=0; i<ec; i++) {
120     garray[i] = garray[i]/bs;
121   }
122 
123   stmp = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(stmp);
124   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
125   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
126   ierr = PetscFree(stmp);CHKERRQ(ierr);
127 
128   /* create temporary global vector to generate scatter context */
129   /* this is inefficient, but otherwise we must do either
130      1) save garray until the first actual scatter when the vector is known or
131      2) have another way of generating a scatter context without a vector.*/
132   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr);
133 
134   /* gnerate the scatter context */
135   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
136 
137   /*
138       Post the receives for the first matrix vector product. We sync-chronize after
139     this on the chance that the user immediately calls MatMult() after assemblying
140     the matrix.
141   */
142   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
143   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
144 
145   PLogObjectParent(mat,baij->Mvctx);
146   PLogObjectParent(mat,baij->lvec);
147   PLogObjectParent(mat,from);
148   PLogObjectParent(mat,to);
149   baij->garray = garray;
150   PLogObjectMemory(mat,(ec+1)*sizeof(int));
151   ierr = ISDestroy(from);CHKERRQ(ierr);
152   ierr = ISDestroy(to);CHKERRQ(ierr);
153   ierr = VecDestroy(gvec);CHKERRQ(ierr);
154   ierr = PetscFree(tmp);CHKERRQ(ierr);
155   PetscFunctionReturn(0);
156 }
157 
158 
159 /*
160      Takes the local part of an already assembled MPIBAIJ matrix
161    and disassembles it. This is to allow new nonzeros into the matrix
162    that require more communication in the matrix vector multiply.
163    Thus certain data-structures must be rebuilt.
164 
165    Kind of slow! But that's what application programmers get when
166    they are sloppy.
167 */
168 #undef __FUNC__
169 #define  __FUNC__ /*<a name=""></a>*/"DisAssemble_MPIBAIJ"
170 int DisAssemble_MPIBAIJ(Mat A)
171 {
172   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
173   Mat         B = baij->B,Bnew;
174   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
175   int         ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
176   int         k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
177   MatScalar   *a = Bbaij->a;
178 #if defined(PETSC_USE_MAT_SINGLE)
179   Scalar      *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar));
180   int         l;
181 #endif
182 
183   PetscFunctionBegin;
184   /* free stuff related to matrix-vec multiply */
185   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */
186   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
187   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
188   if (baij->colmap) {
189 #if defined (PETSC_USE_CTABLE)
190     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
191 #else
192     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
193     baij->colmap = 0;
194     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
195 #endif
196   }
197 
198   /* make sure that B is assembled so we can access its values */
199   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
200   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
201 
202   /* invent new B and copy stuff over */
203   nz = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(nz);
204   for (i=0; i<mbs; i++) {
205     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
206   }
207   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
208   ierr = PetscFree(nz);CHKERRQ(ierr);
209 
210   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
211   for (i=0; i<mbs; i++) {
212     rvals[0] = bs*i;
213     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
214     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
215       col = garray[Bbaij->j[j]]*bs;
216       for (k=0; k<bs; k++) {
217 #if defined(PETSC_USE_MAT_SINGLE)
218         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
219 #else
220         atmp = a+j*bs2;
221 #endif
222         ierr = MatSetValues(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
223         col++;
224       }
225     }
226   }
227 #if defined(PETSC_USE_MAT_SINGLE)
228   ierr = PetscFree(atmp);CHKERRQ(ierr);
229 #endif
230   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
231   baij->garray = 0;
232   ierr = PetscFree(rvals);CHKERRQ(ierr);
233   PLogObjectMemory(A,-ec*sizeof(int));
234   ierr = MatDestroy(B);CHKERRQ(ierr);
235   PLogObjectParent(A,Bnew);
236   baij->B = Bnew;
237   A->was_assembled = PETSC_FALSE;
238   PetscFunctionReturn(0);
239 }
240 
241 
242 
243