xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 86e156311f32add12eaf87ef3ef1fd0df086776c)
1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/
2 
3 /*
4    Support for the parallel SBAIJ matrix vector multiply
5 */
6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7 #include "src/vec/vecimpl.h"
8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12 int MatSetUpMultiply_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
15   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
16   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
17   int                bs = baij->bs,*stmp;
18   IS                 from,to;
19   Vec                gvec;
20 #if defined (PETSC_USE_CTABLE)
21   PetscTable         gid1_lid1;
22   PetscTablePosition tpos;
23   int                gid,lid;
24 #endif
25 
26   PetscFunctionBegin;
27 
28 #if defined (PETSC_USE_CTABLE)
29   /* use a table - Mark Adams */
30   PetscTableCreate(B->mbs,&gid1_lid1);
31   for (i=0; i<B->mbs; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       int data,gid1 = aj[B->i[i]+j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
43   /* ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr); */
44   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
45   while (tpos) {
46     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
47     gid--; lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
51   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<B->mbs; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       int gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid --;
62       aj[B->i[i]+j] = lid;
63     }
64   }
65   B->nbs     = ec;
66   baij->B->n = ec*B->bs;
67   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
68   /* Mark Adams */
69 #else
70   /* For the first stab we make an array as long as the number of columns */
71   /* mark those columns that are in baij->B */
72   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
73   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
74   for (i=0; i<B->mbs; i++) {
75     for (j=0; j<B->ilen[i]; j++) {
76       if (!indices[aj[B->i[i] + j]]) ec++;
77       indices[aj[B->i[i] + j] ] = 1;
78     }
79   }
80 
81   /* form array of columns we need */
82   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
83   /* ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr); */
84   ec = 0;
85   for (i=0; i<Nbs; i++) {
86     if (indices[i]) {
87       garray[ec++] = i;
88     }
89   }
90 
91   /* make indices now point into garray */
92   for (i=0; i<ec; i++) {
93     indices[garray[i]] = i;
94   }
95 
96   /* compact out the extra columns in B */
97   for (i=0; i<B->mbs; i++) {
98     for (j=0; j<B->ilen[i]; j++) {
99       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
100     }
101   }
102   B->nbs       = ec;
103   baij->B->n   = ec*B->bs;
104   ierr = PetscFree(indices);CHKERRQ(ierr);
105 #endif
106   /*
107   for (i=0,col=0; i<ec; i++) {
108     for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
109   }
110   */
111   /* create local vector that is used to scatter into */
112   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
113 
114   /* create two temporary index sets for building scatter-gather */
115 
116   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */
117   for (i=0; i<ec; i++) {
118     garray[i] = bs*garray[i];
119   }
120   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
121   for (i=0; i<ec; i++) {
122     garray[i] = garray[i]/bs;
123   }
124 
125   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
126   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
127   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
128   ierr = PetscFree(stmp);CHKERRQ(ierr);
129 
130   /* create temporary global vector to generate scatter context */
131   /* this is inefficient, but otherwise we must do either
132      1) save garray until the first actual scatter when the vector is known or
133      2) have another way of generating a scatter context without a vector.*/
134   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
135 
136   /* gnerate the scatter context */
137   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
138 
139   /*
140       Post the receives for the first matrix vector product. We sync-chronize after
141     this on the chance that the user immediately calls MatMult() after assemblying
142     the matrix.
143   */
144   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
145   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
146 
147   PetscLogObjectParent(mat,baij->Mvctx);
148   PetscLogObjectParent(mat,baij->lvec);
149   PetscLogObjectParent(mat,from);
150   PetscLogObjectParent(mat,to);
151   baij->garray = garray;
152   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
153   ierr = ISDestroy(from);CHKERRQ(ierr);
154   ierr = ISDestroy(to);CHKERRQ(ierr);
155   ierr = VecDestroy(gvec);CHKERRQ(ierr);
156   /* ierr = PetscFree(tmp);CHKERRQ(ierr); */
157   PetscFunctionReturn(0);
158 }
159 
160 
161 /*
162      Takes the local part of an already assembled MPIBAIJ matrix
163    and disassembles it. This is to allow new nonzeros into the matrix
164    that require more communication in the matrix vector multiply.
165    Thus certain data-structures must be rebuilt.
166 
167    Kind of slow! But that's what application programmers get when
168    they are sloppy.
169 */
170 #undef __FUNCT__
171 #define __FUNCT__ "DisAssemble_MPISBAIJ"
172 int DisAssemble_MPISBAIJ(Mat A)
173 {
174   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
175   Mat           B = baij->B,Bnew;
176   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
177   int           ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
178   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
179   MatScalar     *a = Bbaij->a;
180   PetscScalar   *atmp;
181 #if defined(PETSC_USE_MAT_SINGLE)
182   int           l;
183 #endif
184 
185   PetscFunctionBegin;
186 #if defined(PETSC_USE_MAT_SINGLE)
187   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
188 #endif
189   /* free stuff related to matrix-vec multiply */
190   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
191   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
192   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
193   if (baij->colmap) {
194 #if defined (PETSC_USE_CTABLE)
195     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
196 #else
197     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
198     baij->colmap = 0;
199     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
200 #endif
201   }
202 
203   /* make sure that B is assembled so we can access its values */
204   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
205   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
206 
207   /* invent new B and copy stuff over */
208   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
209   for (i=0; i<mbs; i++) {
210     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
211   }
212   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
213   ierr = PetscFree(nz);CHKERRQ(ierr);
214 
215   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
216   for (i=0; i<mbs; i++) {
217     rvals[0] = bs*i;
218     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
219     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
220       col = garray[Bbaij->j[j]]*bs;
221       for (k=0; k<bs; k++) {
222 #if defined(PETSC_USE_MAT_SINGLE)
223         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
224 #else
225         atmp = a+j*bs2 + k*bs;
226 #endif
227         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
228         col++;
229       }
230     }
231   }
232 #if defined(PETSC_USE_MAT_SINGLE)
233   ierr = PetscFree(atmp);CHKERRQ(ierr);
234 #endif
235   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
236   baij->garray = 0;
237   ierr = PetscFree(rvals);CHKERRQ(ierr);
238   PetscLogObjectMemory(A,-ec*sizeof(int));
239   ierr = MatDestroy(B);CHKERRQ(ierr);
240   PetscLogObjectParent(A,Bnew);
241   baij->B = Bnew;
242   A->was_assembled = PETSC_FALSE;
243   PetscFunctionReturn(0);
244 }
245 
246 
247 
248