xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 323b833ffd2abbc021b1f705dc98117c2173689c)
1 /*$Id: mmsbaij.c,v 1.4 2001/01/15 21:46:06 bsmith Exp balay $*/
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
9 
10 #undef __FUNC__
11 #define __FUNC__ "MatSetUpMultiply_MPISBAIJ"
12 int MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
16   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
17   int                col,bs = baij->bs,*tmp,*stmp;
18   IS                 from,to;
19   Vec                gvec;
20 #if defined (PETSC_USE_CTABLE)
21   PetscTable         gid1_lid1;
22   PetscTablePosition tpos;
23   int                gid,lid;
24 #endif
25 
26   PetscFunctionBegin;
27 
28 #if defined (PETSC_USE_CTABLE)
29   /* use a table - Mark Adams */
30   PetscTableCreate(B->mbs,&gid1_lid1);
31   for (i=0; i<B->mbs; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       int data,gid1 = aj[B->i[i]+j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
43   ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr);
44   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
45   while (tpos) {
46     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
47     gid--; lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
51   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<B->mbs; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       int gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid --;
62       aj[B->i[i]+j] = lid;
63     }
64   }
65   B->nbs     = ec;
66   baij->B->n = ec*B->bs;
67   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
68   /* Mark Adams */
69 #else
70   /* For the first stab we make an array as long as the number of columns */
71   /* mark those columns that are in baij->B */
72   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
73   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
74   for (i=0; i<B->mbs; i++) {
75     for (j=0; j<B->ilen[i]; j++) {
76       if (!indices[aj[B->i[i] + j]]) ec++;
77       indices[aj[B->i[i] + j] ] = 1;
78     }
79   }
80 
81   /* form array of columns we need */
82   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
83   ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr);
84 
85   /* make indices now point into garray */
86   for (i=0; i<ec; i++) {
87     indices[garray[i]] = i;
88   }
89 
90   /* compact out the extra columns in B */
91   for (i=0; i<B->mbs; i++) {
92     for (j=0; j<B->ilen[i]; j++) {
93       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
94     }
95   }
96   B->nbs       = ec;
97   baij->B->n   = ec*B->bs;
98   ierr = PetscFree(indices);CHKERRQ(ierr);
99 #endif
100 
101   for (i=0,col=0; i<ec; i++) {
102     for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
103   }
104   /* create local vector that is used to scatter into */
105   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
106 
107   /* create two temporary index sets for building scatter-gather */
108 
109   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */
110   for (i=0,col=0; i<ec; i++) {
111     garray[i] = bs*garray[i];
112   }
113   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
114   for (i=0,col=0; i<ec; i++) {
115     garray[i] = garray[i]/bs;
116   }
117 
118   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
119   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
120   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
121   ierr = PetscFree(stmp);CHKERRQ(ierr);
122 
123   /* create temporary global vector to generate scatter context */
124   /* this is inefficient, but otherwise we must do either
125      1) save garray until the first actual scatter when the vector is known or
126      2) have another way of generating a scatter context without a vector.*/
127   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
128 
129   /* gnerate the scatter context */
130   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
131 
132   /*
133       Post the receives for the first matrix vector product. We sync-chronize after
134     this on the chance that the user immediately calls MatMult() after assemblying
135     the matrix.
136   */
137   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
138   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
139 
140   PetscLogObjectParent(mat,baij->Mvctx);
141   PetscLogObjectParent(mat,baij->lvec);
142   PetscLogObjectParent(mat,from);
143   PetscLogObjectParent(mat,to);
144   baij->garray = garray;
145   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
146   ierr = ISDestroy(from);CHKERRQ(ierr);
147   ierr = ISDestroy(to);CHKERRQ(ierr);
148   ierr = VecDestroy(gvec);CHKERRQ(ierr);
149   ierr = PetscFree(tmp);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 
154 /*
155      Takes the local part of an already assembled MPIBAIJ matrix
156    and disassembles it. This is to allow new nonzeros into the matrix
157    that require more communication in the matrix vector multiply.
158    Thus certain data-structures must be rebuilt.
159 
160    Kind of slow! But that's what application programmers get when
161    they are sloppy.
162 */
163 #undef __FUNC__
164 #define __FUNC__ "DisAssemble_MPISBAIJ"
165 int DisAssemble_MPISBAIJ(Mat A)
166 {
167   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
168   Mat         B = baij->B,Bnew;
169   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
170   int         ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
171   int         k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
172   MatScalar   *a = Bbaij->a;
173   Scalar      *atmp;
174 #if defined(PETSC_USE_MAT_SINGLE)
175   int         l;
176 #endif
177 
178   PetscFunctionBegin;
179 #if defined(PETSC_USE_MAT_SINGLE)
180   ierr = PetscMalloc(baij->bs*sizeof(Scalar),&atmp);
181 #endif
182   /* free stuff related to matrix-vec multiply */
183   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
184   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
185   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
186   if (baij->colmap) {
187 #if defined (PETSC_USE_CTABLE)
188     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
189 #else
190     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
191     baij->colmap = 0;
192     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
193 #endif
194   }
195 
196   /* make sure that B is assembled so we can access its values */
197   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
198   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
199 
200   /* invent new B and copy stuff over */
201   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
202   for (i=0; i<mbs; i++) {
203     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
204   }
205   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
206   ierr = PetscFree(nz);CHKERRQ(ierr);
207 
208   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
209   for (i=0; i<mbs; i++) {
210     rvals[0] = bs*i;
211     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
212     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
213       col = garray[Bbaij->j[j]]*bs;
214       for (k=0; k<bs; k++) {
215 #if defined(PETSC_USE_MAT_SINGLE)
216         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
217 #else
218         atmp = a+j*bs2;
219 #endif
220         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
221         col++;
222       }
223     }
224   }
225 #if defined(PETSC_USE_MAT_SINGLE)
226   ierr = PetscFree(atmp);CHKERRQ(ierr);
227 #endif
228   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
229   baij->garray = 0;
230   ierr = PetscFree(rvals);CHKERRQ(ierr);
231   PetscLogObjectMemory(A,-ec*sizeof(int));
232   ierr = MatDestroy(B);CHKERRQ(ierr);
233   PetscLogObjectParent(A,Bnew);
234   baij->B = Bnew;
235   A->was_assembled = PETSC_FALSE;
236   PetscFunctionReturn(0);
237 }
238 
239 
240 
241