xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision b0a32e0c6855ee6a6cd3495fa7da12ea9885bc5d)
1 /*$Id: mmsbaij.c,v 1.3 2000/10/24 20:26:02 bsmith Exp bsmith $*/
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
9 
10 #undef __FUNC__
11 #define __FUNC__ "MatSetUpMultiply_MPISBAIJ"
12 int MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
16   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
17   int                col,bs = baij->bs,*tmp,*stmp;
18   IS                 from,to;
19   Vec                gvec;
20 #if defined (PETSC_USE_CTABLE)
21   PetscTable         gid1_lid1;
22   PetscTablePosition tpos;
23   int                gid,lid;
24 #endif
25 
26   PetscFunctionBegin;
27 
28 #if defined (PETSC_USE_CTABLE)
29   /* use a table - Mark Adams */
30   PetscTableCreate(B->mbs,&gid1_lid1);
31   for (i=0; i<B->mbs; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       int data,gid1 = aj[B->i[i]+j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42 ierr = PetscMalloc((ec+1)*sizeof(int),&  garray );CHKERRQ(ierr);
43 ierr = PetscMalloc((ec*bs+1)*sizeof(int),&  tmp    );CHKERRQ(ierr);
44   ierr   = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
45   while (tpos) {
46     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
47     gid--; lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
51   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<B->mbs; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       int gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid --;
62       aj[B->i[i]+j] = lid;
63     }
64   }
65   B->nbs     = ec;
66   baij->B->n = ec*B->bs;
67   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
68   /* Mark Adams */
69 #else
70   /* For the first stab we make an array as long as the number of columns */
71   /* mark those columns that are in baij->B */
72 ierr = PetscMalloc((Nbs+1)*sizeof(int),&  indices );CHKERRQ(ierr);
73   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
74   for (i=0; i<B->mbs; i++) {
75     for (j=0; j<B->ilen[i]; j++) {
76       if (!indices[aj[B->i[i] + j]]) ec++;
77       indices[aj[B->i[i] + j] ] = 1;
78     }
79   }
80 
81   /* form array of columns we need */
82 ierr = PetscMalloc((ec+1)*sizeof(int),&  garray );CHKERRQ(ierr);
83 ierr = PetscMalloc((ec*bs+1)*sizeof(int),&  tmp    );CHKERRQ(ierr);
84 
85   /* make indices now point into garray */
86   for (i=0; i<ec; i++) {
87     indices[garray[i]] = i;
88   }
89 
90   /* compact out the extra columns in B */
91   for (i=0; i<B->mbs; i++) {
92     for (j=0; j<B->ilen[i]; j++) {
93       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
94     }
95   }
96   B->nbs       = ec;
97   baij->B->n   = ec*B->bs;
98   ierr = PetscFree(indices);CHKERRQ(ierr);
99 #endif
100 
101   for (i=0,col=0; i<ec; i++) {
102     for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
103   }
104   /* create local vector that is used to scatter into */
105   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
106 
107   /* create two temporary index sets for building scatter-gather */
108 
109   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */
110   for (i=0,col=0; i<ec; i++) {
111     garray[i] = bs*garray[i];
112   }
113   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
114   for (i=0,col=0; i<ec; i++) {
115     garray[i] = garray[i]/bs;
116   }
117 
118 ierr = PetscMalloc((ec+1)*sizeof(int),&  stmp );CHKERRQ(ierr);
119   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
120   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
121   ierr = PetscFree(stmp);CHKERRQ(ierr);
122 
123   /* create temporary global vector to generate scatter context */
124   /* this is inefficient, but otherwise we must do either
125      1) save garray until the first actual scatter when the vector is known or
126      2) have another way of generating a scatter context without a vector.*/
127   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
128 
129   /* gnerate the scatter context */
130   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
131 
132   /*
133       Post the receives for the first matrix vector product. We sync-chronize after
134     this on the chance that the user immediately calls MatMult() after assemblying
135     the matrix.
136   */
137   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
138   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
139 
140   PetscLogObjectParent(mat,baij->Mvctx);
141   PetscLogObjectParent(mat,baij->lvec);
142   PetscLogObjectParent(mat,from);
143   PetscLogObjectParent(mat,to);
144   baij->garray = garray;
145   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
146   ierr = ISDestroy(from);CHKERRQ(ierr);
147   ierr = ISDestroy(to);CHKERRQ(ierr);
148   ierr = VecDestroy(gvec);CHKERRQ(ierr);
149   ierr = PetscFree(tmp);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 
154 /*
155      Takes the local part of an already assembled MPIBAIJ matrix
156    and disassembles it. This is to allow new nonzeros into the matrix
157    that require more communication in the matrix vector multiply.
158    Thus certain data-structures must be rebuilt.
159 
160    Kind of slow! But that's what application programmers get when
161    they are sloppy.
162 */
163 #undef __FUNC__
164 #define __FUNC__ "DisAssemble_MPISBAIJ"
165 int DisAssemble_MPISBAIJ(Mat A)
166 {
167   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
168   Mat         B = baij->B,Bnew;
169   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
170   int         ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
171   int         k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
172   MatScalar   *a = Bbaij->a;
173 #if defined(PETSC_USE_MAT_SINGLE)
174   Scalar      *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar));
175   int         l;
176 #else
177   Scalar      *atmp;
178 #endif
179 
180   PetscFunctionBegin;
181   /* free stuff related to matrix-vec multiply */
182   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
183   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
184   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
185   if (baij->colmap) {
186 #if defined (PETSC_USE_CTABLE)
187     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
188 #else
189     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
190     baij->colmap = 0;
191     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
192 #endif
193   }
194 
195   /* make sure that B is assembled so we can access its values */
196   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
197   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
198 
199   /* invent new B and copy stuff over */
200 ierr = PetscMalloc(mbs*sizeof(int),&(  nz ));CHKERRQ(ierr);
201   for (i=0; i<mbs; i++) {
202     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
203   }
204   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
205   ierr = PetscFree(nz);CHKERRQ(ierr);
206 
207 ierr = PetscMalloc(bs*sizeof(int),&(  rvals ));CHKERRQ(ierr);
208   for (i=0; i<mbs; i++) {
209     rvals[0] = bs*i;
210     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
211     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
212       col = garray[Bbaij->j[j]]*bs;
213       for (k=0; k<bs; k++) {
214 #if defined(PETSC_USE_MAT_SINGLE)
215         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
216 #else
217         atmp = a+j*bs2;
218 #endif
219         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
220         col++;
221       }
222     }
223   }
224 #if defined(PETSC_USE_MAT_SINGLE)
225   ierr = PetscFree(atmp);CHKERRQ(ierr);
226 #endif
227   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
228   baij->garray = 0;
229   ierr = PetscFree(rvals);CHKERRQ(ierr);
230   PetscLogObjectMemory(A,-ec*sizeof(int));
231   ierr = MatDestroy(B);CHKERRQ(ierr);
232   PetscLogObjectParent(A,Bnew);
233   baij->B = Bnew;
234   A->was_assembled = PETSC_FALSE;
235   PetscFunctionReturn(0);
236 }
237 
238 
239 
240