xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision 5c8f6a953e7ed1c81f507d64aebddb11080b60e9)
1 
2 /*
3    Support for the parallel AIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/aij/mpi/mpiaij.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ"
9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ         *aij = (Mat_MPIAIJ*)mat->data;
12   Mat_SeqAIJ         *B = (Mat_SeqAIJ*)(aij->B->data);
13   PetscErrorCode     ierr;
14   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
15   IS                 from,to;
16   Vec                gvec;
17   PetscBool          useblockis;
18 #if defined (PETSC_USE_CTABLE)
19   PetscTable         gid1_lid1;
20   PetscTablePosition tpos;
21   PetscInt           gid,lid;
22 #else
23   PetscInt           N = mat->cmap->N,*indices;
24 #endif
25 
26   PetscFunctionBegin;
27 #if defined (PETSC_USE_CTABLE)
28   /* use a table */
29   ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr);
30   for (i=0; i<aij->B->rmap->n; i++) {
31     for (j=0; j<B->ilen[i]; j++) {
32       PetscInt data,gid1 = aj[B->i[i] + j] + 1;
33       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
34       if (!data) {
35         /* one based table */
36         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
37       }
38     }
39   }
40   /* form array of columns we need */
41   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
42   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
43   while (tpos) {
44     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
45     gid--;
46     lid--;
47     garray[lid] = gid;
48   }
49   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */
50   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
51   for (i=0; i<ec; i++) {
52     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
53   }
54   /* compact out the extra columns in B */
55   for (i=0; i<aij->B->rmap->n; i++) {
56     for (j=0; j<B->ilen[i]; j++) {
57       PetscInt gid1 = aj[B->i[i] + j] + 1;
58       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
59       lid --;
60       aj[B->i[i] + j]  = lid;
61     }
62   }
63   aij->B->cmap->n = aij->B->cmap->N = ec;
64   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
65   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
66 #else
67   /* Make an array as long as the number of columns */
68   /* mark those columns that are in aij->B */
69   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
70   ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr);
71   for (i=0; i<aij->B->rmap->n; i++) {
72     for (j=0; j<B->ilen[i]; j++) {
73       if (!indices[aj[B->i[i] + j] ]) ec++;
74       indices[aj[B->i[i] + j] ] = 1;
75     }
76   }
77 
78   /* form array of columns we need */
79   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
80   ec = 0;
81   for (i=0; i<N; i++) {
82     if (indices[i]) garray[ec++] = i;
83   }
84 
85   /* make indices now point into garray */
86   for (i=0; i<ec; i++) {
87     indices[garray[i]] = i;
88   }
89 
90   /* compact out the extra columns in B */
91   for (i=0; i<aij->B->rmap->n; i++) {
92     for (j=0; j<B->ilen[i]; j++) {
93       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
94     }
95   }
96   aij->B->cmap->n = aij->B->cmap->N = ec;
97   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
98   ierr = PetscFree(indices);CHKERRQ(ierr);
99 #endif
100   /* create local vector that is used to scatter into */
101   ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr);
102 
103   /* create two temporary Index sets for build scatter gather */
104   /*  check for the special case where blocks are communicated for faster VecScatterXXX */
105   useblockis = PETSC_FALSE;
106   if (mat->cmap->bs > 1) {
107     PetscInt bs = mat->cmap->bs,ibs,ga;
108     if (!(ec % bs)) {
109       useblockis = PETSC_TRUE;
110       for (i=0; i<ec/bs; i++) {
111         if ((ga = garray[ibs = i*bs]) % bs) {
112           useblockis = PETSC_FALSE;
113           break;
114         }
115         for (j=1; j<bs; j++) {
116           if (garray[ibs+j] != ga+j) {
117             useblockis = PETSC_FALSE;
118             break;
119           }
120         }
121         if (!useblockis) break;
122       }
123     }
124   }
125 #if defined(PETSC_USE_DEBUG)
126   i = (PetscInt)useblockis;
127   ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,((PetscObject)mat)->comm);CHKERRQ(ierr);
128   if (j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)");
129 #endif
130 
131   if (useblockis) {
132     PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs;
133     if (ec%bs)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs);
134     ierr = PetscInfo(mat,"Using block index set to define scatter\n");CHKERRQ(ierr);
135     ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr);
136     for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs;
137     ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr);
138   } else {
139     ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
140   }
141 
142   ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr);
143 
144   /* create temporary global vector to generate scatter context */
145   /* This does not allocate the array's memory so is efficient */
146   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
147 
148   /* generate the scatter context */
149   ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr);
150   ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr);
151   ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr);
152   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
153   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
154   aij->garray = garray;
155   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
156   ierr = ISDestroy(&from);CHKERRQ(ierr);
157   ierr = ISDestroy(&to);CHKERRQ(ierr);
158   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
159   PetscFunctionReturn(0);
160 }
161 
162 
163 #undef __FUNCT__
164 #define __FUNCT__ "MatDisAssemble_MPIAIJ"
165 /*
166      Takes the local part of an already assembled MPIAIJ matrix
167    and disassembles it. This is to allow new nonzeros into the matrix
168    that require more communication in the matrix vector multiply.
169    Thus certain data-structures must be rebuilt.
170 
171    Kind of slow! But that's what application programmers get when
172    they are sloppy.
173 */
174 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A)
175 {
176   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
177   Mat            B = aij->B,Bnew;
178   Mat_SeqAIJ     *Baij = (Mat_SeqAIJ*)B->data;
179   PetscErrorCode ierr;
180   PetscInt       i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec;
181   PetscScalar    v;
182 
183   PetscFunctionBegin;
184   /* free stuff related to matrix-vec multiply */
185   ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
186   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
187   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
188   if (aij->colmap) {
189 #if defined (PETSC_USE_CTABLE)
190     ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
191 #else
192     ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
193     ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr);
194 #endif
195   }
196 
197   /* make sure that B is assembled so we can access its values */
198   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
199   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
200 
201   /* invent new B and copy stuff over */
202   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr);
203   for (i=0; i<m; i++) {
204     nz[i] = Baij->i[i+1] - Baij->i[i];
205   }
206   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
207   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
208   ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
209   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
210   ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr);
211   ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */
212   ierr = PetscFree(nz);CHKERRQ(ierr);
213   for (i=0; i<m; i++) {
214     for (j=Baij->i[i]; j<Baij->i[i+1]; j++) {
215       col  = garray[Baij->j[ct]];
216       v    = Baij->a[ct++];
217       ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr);
218     }
219   }
220   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
221   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
222   ierr = MatDestroy(&B);CHKERRQ(ierr);
223   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
224   aij->B = Bnew;
225   A->was_assembled = PETSC_FALSE;
226   PetscFunctionReturn(0);
227 }
228 
229 /*      ugly stuff added for Glenn someday we should fix this up */
230 
231 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
232                                       parts of the local matrix */
233 static Vec auglydd = 0,auglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
234 
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp"
238 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
239 {
240   Mat_MPIAIJ     *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */
241   PetscErrorCode ierr;
242   PetscInt       i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices;
243   PetscInt       *r_rmapd,*r_rmapo;
244 
245   PetscFunctionBegin;
246   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
247   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
248   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
249   ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
250   nt   = 0;
251   for (i=0; i<inA->rmap->mapping->n; i++) {
252     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
253       nt++;
254       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
255     }
256   }
257   if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n);
258   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr);
259   for (i=0; i<inA->rmap->mapping->n; i++) {
260     if (r_rmapd[i]) {
261       auglyrmapd[(r_rmapd[i]-1)-cstart] = i;
262     }
263   }
264   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
265   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr);
266 
267   ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
268   ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
269   for (i=0; i<ina->B->cmap->n; i++) {
270     lindices[garray[i]] = i+1;
271   }
272   no   = inA->rmap->mapping->n - nt;
273   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
274   ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
275   nt   = 0;
276   for (i=0; i<inA->rmap->mapping->n; i++) {
277     if (lindices[inA->rmap->mapping->indices[i]]) {
278       nt++;
279       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
280     }
281   }
282   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
283   ierr = PetscFree(lindices);CHKERRQ(ierr);
284   ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr);
285   for (i=0; i<inA->rmap->mapping->n; i++) {
286     if (r_rmapo[i]) {
287       auglyrmapo[(r_rmapo[i]-1)] = i;
288     }
289   }
290   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
291   ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr);
292 
293   PetscFunctionReturn(0);
294 }
295 
296 #undef __FUNCT__
297 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal"
298 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale)
299 {
300   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
301   PetscErrorCode ierr;
302 
303   PetscFunctionBegin;
304   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 
308 EXTERN_C_BEGIN
309 #undef __FUNCT__
310 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ"
311 PetscErrorCode  MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)
312 {
313   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */
314   PetscErrorCode ierr;
315   PetscInt       n,i;
316   PetscScalar    *d,*o,*s;
317 
318   PetscFunctionBegin;
319   if (!auglyrmapd) {
320     ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
321   }
322 
323   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
324 
325   ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr);
326   ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr);
327   for (i=0; i<n; i++) {
328     d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
329   }
330   ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr);
331   /* column scale "diagonal" portion of local matrix */
332   ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr);
333 
334   ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr);
335   ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr);
336   for (i=0; i<n; i++) {
337     o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
338   }
339   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
340   ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr);
341   /* column scale "off-diagonal" portion of local matrix */
342   ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr);
343 
344   PetscFunctionReturn(0);
345 }
346 EXTERN_C_END
347 
348 
349