xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision d45987f3cb8da7dca4e7c09f0fedfc3d8f6afad7)
1 
2 /*
3    Support for the parallel AIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/aij/mpi/mpiaij.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ"
9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ         *aij = (Mat_MPIAIJ*)mat->data;
12   Mat_SeqAIJ         *B = (Mat_SeqAIJ*)(aij->B->data);
13   PetscErrorCode     ierr;
14   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
15   IS                 from,to;
16   Vec                gvec;
17   PetscBool          useblockis;
18 #if defined (PETSC_USE_CTABLE)
19   PetscTable         gid1_lid1;
20   PetscTablePosition tpos;
21   PetscInt           gid,lid;
22 #else
23   PetscInt           N = mat->cmap->N,*indices;
24 #endif
25 
26   PetscFunctionBegin;
27 
28 #if defined (PETSC_USE_CTABLE)
29   /* use a table */
30   ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr);
31   for (i=0; i<aij->B->rmap->n; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       PetscInt data,gid1 = aj[B->i[i] + j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
43   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
44   while (tpos) {
45     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
46     gid--;
47     lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */
51   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52   for (i=0; i<ec; i++) {
53     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
54   }
55   /* compact out the extra columns in B */
56   for (i=0; i<aij->B->rmap->n; i++) {
57     for (j=0; j<B->ilen[i]; j++) {
58       PetscInt gid1 = aj[B->i[i] + j] + 1;
59       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60       lid --;
61       aj[B->i[i] + j]  = lid;
62     }
63   }
64   aij->B->cmap->n = aij->B->cmap->N = ec;
65   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
66   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
67 #else
68   /* Make an array as long as the number of columns */
69   /* mark those columns that are in aij->B */
70   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
71   ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr);
72   for (i=0; i<aij->B->rmap->n; i++) {
73     for (j=0; j<B->ilen[i]; j++) {
74       if (!indices[aj[B->i[i] + j] ]) ec++;
75       indices[aj[B->i[i] + j] ] = 1;
76     }
77   }
78 
79   /* form array of columns we need */
80   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
81   ec = 0;
82   for (i=0; i<N; i++) {
83     if (indices[i]) garray[ec++] = i;
84   }
85 
86   /* make indices now point into garray */
87   for (i=0; i<ec; i++) {
88     indices[garray[i]] = i;
89   }
90 
91   /* compact out the extra columns in B */
92   for (i=0; i<aij->B->rmap->n; i++) {
93     for (j=0; j<B->ilen[i]; j++) {
94       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
95     }
96   }
97   aij->B->cmap->n = aij->B->cmap->N = ec;
98   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
99   ierr = PetscFree(indices);CHKERRQ(ierr);
100 #endif
101   /* create local vector that is used to scatter into */
102   ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr);
103 
104   /* create two temporary Index sets for build scatter gather */
105   /*  check for the special case where blocks are communicated for faster VecScatterXXX */
106   useblockis = PETSC_FALSE;
107   if (mat->cmap->bs > 1) {
108     PetscInt bs = mat->cmap->bs,ibs,ga;
109     if (!(ec % bs)) {
110       useblockis = PETSC_TRUE;
111       for (i=0; i<ec/bs; i++) {
112         if ((ga = garray[ibs = i*bs]) % bs) {
113           useblockis = PETSC_FALSE;
114           break;
115         }
116         for (j=1; j<bs; j++) {
117           if (garray[ibs+j] != ga+j) {
118             useblockis = PETSC_FALSE;
119             break;
120           }
121         }
122         if (!useblockis) break;
123       }
124     }
125   }
126 #if defined(PETSC_USE_DEBUG)
127   i = (PetscInt)useblockis;
128   ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,((PetscObject)mat)->comm); CHKERRQ(ierr);
129   if(j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)");
130 #endif
131 
132   if (useblockis) {
133     PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs;
134     if(ec%bs)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs);
135     ierr = PetscInfo(mat,"Using block index set to define scatter\n");
136     ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr);
137     for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs;
138     ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr);
139   } else {
140     ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
141   }
142 
143   ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr);
144 
145   /* create temporary global vector to generate scatter context */
146   /* This does not allocate the array's memory so is efficient */
147   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
148 
149   /* generate the scatter context */
150   ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr);
151   ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr);
152   ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr);
153   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
154   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
155   aij->garray = garray;
156   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
157   ierr = ISDestroy(&from);CHKERRQ(ierr);
158   ierr = ISDestroy(&to);CHKERRQ(ierr);
159   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
160   PetscFunctionReturn(0);
161 }
162 
163 
164 #undef __FUNCT__
165 #define __FUNCT__ "MatDisAssemble_MPIAIJ"
166 /*
167      Takes the local part of an already assembled MPIAIJ matrix
168    and disassembles it. This is to allow new nonzeros into the matrix
169    that require more communication in the matrix vector multiply.
170    Thus certain data-structures must be rebuilt.
171 
172    Kind of slow! But that's what application programmers get when
173    they are sloppy.
174 */
175 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A)
176 {
177   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
178   Mat            B = aij->B,Bnew;
179   Mat_SeqAIJ     *Baij = (Mat_SeqAIJ*)B->data;
180   PetscErrorCode ierr;
181   PetscInt       i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec;
182   PetscScalar    v;
183 
184   PetscFunctionBegin;
185   /* free stuff related to matrix-vec multiply */
186   ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
187   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
188   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
189   if (aij->colmap) {
190 #if defined (PETSC_USE_CTABLE)
191     ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
192 #else
193     ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
194     ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr);
195 #endif
196   }
197 
198   /* make sure that B is assembled so we can access its values */
199   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
200   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
201 
202   /* invent new B and copy stuff over */
203   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr);
204   for (i=0; i<m; i++) {
205     nz[i] = Baij->i[i+1] - Baij->i[i];
206   }
207   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
208   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
209   ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
210   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
211   ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr);
212   ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */
213   ierr = PetscFree(nz);CHKERRQ(ierr);
214   for (i=0; i<m; i++) {
215     for (j=Baij->i[i]; j<Baij->i[i+1]; j++) {
216       col  = garray[Baij->j[ct]];
217       v    = Baij->a[ct++];
218       ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr);
219     }
220   }
221   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
222   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
223   ierr = MatDestroy(&B);CHKERRQ(ierr);
224   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
225   aij->B = Bnew;
226   A->was_assembled = PETSC_FALSE;
227   PetscFunctionReturn(0);
228 }
229 
230 /*      ugly stuff added for Glenn someday we should fix this up */
231 
232 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
233                                       parts of the local matrix */
234 static Vec auglydd = 0,auglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
235 
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp"
239 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
240 {
241   Mat_MPIAIJ     *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */
242   PetscErrorCode ierr;
243   PetscInt       i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices;
244   PetscInt       *r_rmapd,*r_rmapo;
245 
246   PetscFunctionBegin;
247   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
248   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
249   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
250   ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
251   nt   = 0;
252   for (i=0; i<inA->rmap->mapping->n; i++) {
253     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
254       nt++;
255       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
256     }
257   }
258   if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n);
259   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr);
260   for (i=0; i<inA->rmap->mapping->n; i++) {
261     if (r_rmapd[i]){
262       auglyrmapd[(r_rmapd[i]-1)-cstart] = i;
263     }
264   }
265   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
266   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr);
267 
268   ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
269   ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
270   for (i=0; i<ina->B->cmap->n; i++) {
271     lindices[garray[i]] = i+1;
272   }
273   no   = inA->rmap->mapping->n - nt;
274   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
275   ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
276   nt   = 0;
277   for (i=0; i<inA->rmap->mapping->n; i++) {
278     if (lindices[inA->rmap->mapping->indices[i]]) {
279       nt++;
280       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
281     }
282   }
283   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
284   ierr = PetscFree(lindices);CHKERRQ(ierr);
285   ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr);
286   for (i=0; i<inA->rmap->mapping->n; i++) {
287     if (r_rmapo[i]){
288       auglyrmapo[(r_rmapo[i]-1)] = i;
289     }
290   }
291   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
292   ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr);
293 
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal"
299 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale)
300 {
301   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
302   PetscErrorCode ierr;
303 
304   PetscFunctionBegin;
305   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 EXTERN_C_BEGIN
310 #undef __FUNCT__
311 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ"
312 PetscErrorCode  MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)
313 {
314   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */
315   PetscErrorCode ierr;
316   PetscInt       n,i;
317   PetscScalar    *d,*o,*s;
318 
319   PetscFunctionBegin;
320   if (!auglyrmapd) {
321     ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
322   }
323 
324   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
325 
326   ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr);
327   ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr);
328   for (i=0; i<n; i++) {
329     d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
330   }
331   ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr);
332   /* column scale "diagonal" portion of local matrix */
333   ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr);
334 
335   ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr);
336   ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr);
337   for (i=0; i<n; i++) {
338     o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
339   }
340   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
341   ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr);
342   /* column scale "off-diagonal" portion of local matrix */
343   ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr);
344 
345   PetscFunctionReturn(0);
346 }
347 EXTERN_C_END
348 
349 
350