xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision 6024bd2c273c6b4ac03c2bd49d1968cdcd0f9df3)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel AIJ matrix vector multiply
5 */
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ"
10 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
11 {
12   Mat_MPIAIJ         *aij = (Mat_MPIAIJ*)mat->data;
13   Mat_SeqAIJ         *B = (Mat_SeqAIJ*)(aij->B->data);
14   PetscErrorCode     ierr;
15   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
16   IS                 from,to;
17   Vec                gvec;
18 #if defined (PETSC_USE_CTABLE)
19   PetscTable         gid1_lid1;
20   PetscTablePosition tpos;
21   PetscInt           gid,lid;
22 #else
23   PetscInt           N = mat->N,*indices;
24 #endif
25 
26   PetscFunctionBegin;
27 
28 #if defined (PETSC_USE_CTABLE)
29   /* use a table - Mark Adams */
30   ierr = PetscTableCreate(aij->B->m,&gid1_lid1);CHKERRQ(ierr);
31   for (i=0; i<aij->B->m; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       PetscInt data,gid1 = aj[B->i[i] + j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
43   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
44   while (tpos) {
45     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
46     gid--;
47     lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */
51   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52   for (i=0; i<ec; i++) {
53     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
54   }
55   /* compact out the extra columns in B */
56   for (i=0; i<aij->B->m; i++) {
57     for (j=0; j<B->ilen[i]; j++) {
58       PetscInt gid1 = aj[B->i[i] + j] + 1;
59       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60       lid --;
61       aj[B->i[i] + j]  = lid;
62     }
63   }
64   aij->B->n = aij->B->N = ec;
65   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
66   /* Mark Adams */
67 #else
68   /* For the first stab we make an array as long as the number of columns */
69   /* mark those columns that are in aij->B */
70   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
71   ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr);
72   for (i=0; i<aij->B->m; i++) {
73     for (j=0; j<B->ilen[i]; j++) {
74       if (!indices[aj[B->i[i] + j] ]) ec++;
75       indices[aj[B->i[i] + j] ] = 1;
76     }
77   }
78 
79   /* form array of columns we need */
80   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
81   ec = 0;
82   for (i=0; i<N; i++) {
83     if (indices[i]) garray[ec++] = i;
84   }
85 
86   /* make indices now point into garray */
87   for (i=0; i<ec; i++) {
88     indices[garray[i]] = i;
89   }
90 
91   /* compact out the extra columns in B */
92   for (i=0; i<aij->B->m; i++) {
93     for (j=0; j<B->ilen[i]; j++) {
94       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
95     }
96   }
97   aij->B->n = aij->B->N = ec;
98   ierr = PetscFree(indices);CHKERRQ(ierr);
99 #endif
100   /* create local vector that is used to scatter into */
101   ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr);
102 
103   /* create two temporary Index sets for build scatter gather */
104   ierr = ISCreateGeneral(mat->comm,ec,garray,&from);CHKERRQ(ierr);
105   ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr);
106 
107   /* create temporary global vector to generate scatter context */
108   /* this is inefficient, but otherwise we must do either
109      1) save garray until the first actual scatter when the vector is known or
110      2) have another way of generating a scatter context without a vector.*/
111   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
112 
113   /* generate the scatter context */
114   ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr);
115   ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr);
116   ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr);
117   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
118   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
119   aij->garray = garray;
120   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
121   ierr = ISDestroy(from);CHKERRQ(ierr);
122   ierr = ISDestroy(to);CHKERRQ(ierr);
123   ierr = VecDestroy(gvec);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 
128 #undef __FUNCT__
129 #define __FUNCT__ "DisAssemble_MPIAIJ"
130 /*
131      Takes the local part of an already assembled MPIAIJ matrix
132    and disassembles it. This is to allow new nonzeros into the matrix
133    that require more communication in the matrix vector multiply.
134    Thus certain data-structures must be rebuilt.
135 
136    Kind of slow! But that's what application programmers get when
137    they are sloppy.
138 */
139 PetscErrorCode DisAssemble_MPIAIJ(Mat A)
140 {
141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
142   Mat            B = aij->B,Bnew;
143   Mat_SeqAIJ     *Baij = (Mat_SeqAIJ*)B->data;
144   PetscErrorCode ierr;
145   PetscInt       i,j,m = B->m,n = A->N,col,ct = 0,*garray = aij->garray,*nz,ec;
146   PetscScalar    v;
147 
148   PetscFunctionBegin;
149   /* free stuff related to matrix-vec multiply */
150   ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
151   ierr = VecDestroy(aij->lvec);CHKERRQ(ierr); aij->lvec = 0;
152   ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr); aij->Mvctx = 0;
153   if (aij->colmap) {
154 #if defined (PETSC_USE_CTABLE)
155     ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);
156     aij->colmap = 0;
157 #else
158     ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
159     aij->colmap = 0;
160     ierr = PetscLogObjectMemory(A,-aij->B->n*sizeof(PetscInt));CHKERRQ(ierr);
161 #endif
162   }
163 
164   /* make sure that B is assembled so we can access its values */
165   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
167 
168   /* invent new B and copy stuff over */
169   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr);
170   for (i=0; i<m; i++) {
171     nz[i] = Baij->i[i+1] - Baij->i[i];
172   }
173   ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr);
174   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
175   ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr);
176   ierr = PetscFree(nz);CHKERRQ(ierr);
177   for (i=0; i<m; i++) {
178     for (j=Baij->i[i]; j<Baij->i[i+1]; j++) {
179       col  = garray[Baij->j[ct]];
180       v    = Baij->a[ct++];
181       ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr);
182     }
183   }
184   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
185   aij->garray = 0;
186   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
187   ierr = MatDestroy(B);CHKERRQ(ierr);
188   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
189   aij->B = Bnew;
190   A->was_assembled = PETSC_FALSE;
191   PetscFunctionReturn(0);
192 }
193 
194 /*      ugly stuff added for Glenn someday we should fix this up */
195 
196 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
197                                       parts of the local matrix */
198 static Vec auglydd = 0,auglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
199 
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp"
203 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
204 {
205   Mat_MPIAIJ     *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */
206   PetscErrorCode ierr;
207   PetscInt       i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices;
208   PetscInt       *r_rmapd,*r_rmapo;
209 
210   PetscFunctionBegin;
211   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
212   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
213   ierr = PetscMalloc((inA->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
214   ierr = PetscMemzero(r_rmapd,inA->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
215   nt   = 0;
216   for (i=0; i<inA->mapping->n; i++) {
217     if (inA->mapping->indices[i] >= cstart && inA->mapping->indices[i] < cend) {
218       nt++;
219       r_rmapd[i] = inA->mapping->indices[i] + 1;
220     }
221   }
222   if (nt != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n);
223   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr);
224   for (i=0; i<inA->mapping->n; i++) {
225     if (r_rmapd[i]){
226       auglyrmapd[(r_rmapd[i]-1)-cstart] = i;
227     }
228   }
229   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
230   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr);
231 
232   ierr = PetscMalloc((inA->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
233   ierr = PetscMemzero(lindices,inA->N*sizeof(PetscInt));CHKERRQ(ierr);
234   for (i=0; i<ina->B->n; i++) {
235     lindices[garray[i]] = i+1;
236   }
237   no   = inA->mapping->n - nt;
238   ierr = PetscMalloc((inA->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
239   ierr = PetscMemzero(r_rmapo,inA->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
240   nt   = 0;
241   for (i=0; i<inA->mapping->n; i++) {
242     if (lindices[inA->mapping->indices[i]]) {
243       nt++;
244       r_rmapo[i] = lindices[inA->mapping->indices[i]];
245     }
246   }
247   if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
248   ierr = PetscFree(lindices);CHKERRQ(ierr);
249   ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr);
250   for (i=0; i<inA->mapping->n; i++) {
251     if (r_rmapo[i]){
252       auglyrmapo[(r_rmapo[i]-1)] = i;
253     }
254   }
255   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
256   ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr);
257 
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal"
263 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale)
264 {
265   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
266   PetscErrorCode ierr,(*f)(Mat,Vec);
267 
268   PetscFunctionBegin;
269   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
270   if (f) {
271     ierr = (*f)(A,scale);CHKERRQ(ierr);
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 EXTERN_C_BEGIN
277 #undef __FUNCT__
278 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ"
279 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)
280 {
281   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */
282   PetscErrorCode ierr;
283   PetscInt       n,i;
284   PetscScalar    *d,*o,*s;
285 
286   PetscFunctionBegin;
287   if (!auglyrmapd) {
288     ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
289   }
290 
291   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
292 
293   ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr);
294   ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr);
295   for (i=0; i<n; i++) {
296     d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
297   }
298   ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr);
299   /* column scale "diagonal" portion of local matrix */
300   ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr);
301 
302   ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr);
303   ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr);
304   for (i=0; i<n; i++) {
305     o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
306   }
307   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
308   ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr);
309   /* column scale "off-diagonal" portion of local matrix */
310   ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr);
311 
312   PetscFunctionReturn(0);
313 }
314 EXTERN_C_END
315 
316 
317