xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision b9b9770376f6bdad54cc53bb7a8b79bbb53b7e80)
1 /*$Id: mmbaij.c,v 1.33 2000/05/10 16:40:56 bsmith Exp bsmith $*/
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
9 
10 #undef __FUNC__
11 #define __FUNC__ /*<a name=""></a>*/"MatSetUpMultiply_MPIBAIJ"
12 int MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
16   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
17   int                col,bs = baij->bs,*tmp,*stmp;
18   IS                 from,to;
19   Vec                gvec;
20 #if defined (PETSC_USE_CTABLE)
21   PetscTable         gid1_lid1;
22   PetscTablePosition tpos;
23   int                gid,lid;
24 #endif
25 
26   PetscFunctionBegin;
27 
28 #if defined (PETSC_USE_CTABLE)
29   /* use a table - Mark Adams */
30   PetscTableCreate(B->mbs,&gid1_lid1);
31   for (i=0; i<B->mbs; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       int data,gid1 = aj[B->i[i]+j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42   garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray);
43   tmp    = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp);
44   ierr   = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
45   while (tpos) {
46     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
47     gid--; lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
51   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<B->mbs; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       int gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid --;
62       aj[B->i[i]+j] = lid;
63     }
64   }
65   B->nbs = ec;
66   B->n   = ec*B->bs;
67   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
68   /* Mark Adams */
69 #else
70   /* For the first stab we make an array as long as the number of columns */
71   /* mark those columns that are in baij->B */
72   indices = (int*)PetscMalloc((Nbs+1)*sizeof(int));CHKPTRQ(indices);
73   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
74   for (i=0; i<B->mbs; i++) {
75     for (j=0; j<B->ilen[i]; j++) {
76       if (!indices[aj[B->i[i] + j]]) ec++;
77       indices[aj[B->i[i] + j] ] = 1;
78     }
79   }
80 
81   /* form array of columns we need */
82   garray = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray);
83   tmp    = (int*)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp)
84   ec = 0;
85   for (i=0; i<Nbs; i++) {
86     if (indices[i]) {
87       garray[ec++] = i;
88     }
89   }
90 
91   /* make indices now point into garray */
92   for (i=0; i<ec; i++) {
93     indices[garray[i]] = i;
94   }
95 
96   /* compact out the extra columns in B */
97   for (i=0; i<B->mbs; i++) {
98     for (j=0; j<B->ilen[i]; j++) {
99       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
100     }
101   }
102   B->nbs = ec;
103   B->n   = ec*B->bs;
104   ierr = PetscFree(indices);CHKERRQ(ierr);
105 #endif
106 
107   for (i=0,col=0; i<ec; i++) {
108     for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
109   }
110   /* create local vector that is used to scatter into */
111   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
112 
113   /* create two temporary index sets for building scatter-gather */
114   for (i=0,col=0; i<ec; i++) {
115     garray[i] = bs*garray[i];
116   }
117   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
118   for (i=0,col=0; i<ec; i++) {
119     garray[i] = garray[i]/bs;
120   }
121 
122   stmp = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(stmp);
123   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
124   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
125   ierr = PetscFree(stmp);CHKERRQ(ierr);
126 
127   /* create temporary global vector to generate scatter context */
128   /* this is inefficient, but otherwise we must do either
129      1) save garray until the first actual scatter when the vector is known or
130      2) have another way of generating a scatter context without a vector.*/
131   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr);
132 
133   /* gnerate the scatter context */
134   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
135 
136   /*
137       Post the receives for the first matrix vector product. We sync-chronize after
138     this on the chance that the user immediately calls MatMult() after assemblying
139     the matrix.
140   */
141   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
142   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
143 
144   PLogObjectParent(mat,baij->Mvctx);
145   PLogObjectParent(mat,baij->lvec);
146   PLogObjectParent(mat,from);
147   PLogObjectParent(mat,to);
148   baij->garray = garray;
149   PLogObjectMemory(mat,(ec+1)*sizeof(int));
150   ierr = ISDestroy(from);CHKERRQ(ierr);
151   ierr = ISDestroy(to);CHKERRQ(ierr);
152   ierr = VecDestroy(gvec);CHKERRQ(ierr);
153   ierr = PetscFree(tmp);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 
158 /*
159      Takes the local part of an already assembled MPIBAIJ matrix
160    and disassembles it. This is to allow new nonzeros into the matrix
161    that require more communication in the matrix vector multiply.
162    Thus certain data-structures must be rebuilt.
163 
164    Kind of slow! But that's what application programmers get when
165    they are sloppy.
166 */
167 #undef __FUNC__
168 #define __FUNC__ /*<a name=""></a>*/"DisAssemble_MPIBAIJ"
169 int DisAssemble_MPIBAIJ(Mat A)
170 {
171   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
172   Mat         B = baij->B,Bnew;
173   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
174   int         ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
175   int         k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
176   MatScalar   *a = Bbaij->a;
177 #if defined(PETSC_USE_MAT_SINGLE)
178   Scalar      *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar));
179   int         l;
180 #else
181   Scalar      *atmp;
182 #endif
183 
184   PetscFunctionBegin;
185   /* free stuff related to matrix-vec multiply */
186   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */
187   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
188   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
189   if (baij->colmap) {
190 #if defined (PETSC_USE_CTABLE)
191     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
192 #else
193     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
194     baij->colmap = 0;
195     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
196 #endif
197   }
198 
199   /* make sure that B is assembled so we can access its values */
200   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
201   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
202 
203   /* invent new B and copy stuff over */
204   nz = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(nz);
205   for (i=0; i<mbs; i++) {
206     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
207   }
208   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
209   ierr = PetscFree(nz);CHKERRQ(ierr);
210 
211   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
212   for (i=0; i<mbs; i++) {
213     rvals[0] = bs*i;
214     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
215     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
216       col = garray[Bbaij->j[j]]*bs;
217       for (k=0; k<bs; k++) {
218 #if defined(PETSC_USE_MAT_SINGLE)
219         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
220 #else
221         atmp = a+j*bs2;
222 #endif
223         ierr = MatSetValues_SeqBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
224         col++;
225       }
226     }
227   }
228 #if defined(PETSC_USE_MAT_SINGLE)
229   ierr = PetscFree(atmp);CHKERRQ(ierr);
230 #endif
231   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
232   baij->garray = 0;
233   ierr = PetscFree(rvals);CHKERRQ(ierr);
234   PLogObjectMemory(A,-ec*sizeof(int));
235   ierr = MatDestroy(B);CHKERRQ(ierr);
236   PLogObjectParent(A,Bnew);
237   baij->B = Bnew;
238   A->was_assembled = PETSC_FALSE;
239   PetscFunctionReturn(0);
240 }
241 
242 
243 
244