xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision a213cbb1ef4af2e4bfedadfd705b644759ea7e07)
1 
2 /*
3    Support for the parallel AIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/aij/mpi/mpiaij.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ"
9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ         *aij = (Mat_MPIAIJ*)mat->data;
12   Mat_SeqAIJ         *B = (Mat_SeqAIJ*)(aij->B->data);
13   PetscErrorCode     ierr;
14   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
15   IS                 from,to;
16   Vec                gvec;
17   PetscBool          useblockis;
18 #if defined (PETSC_USE_CTABLE)
19   PetscTable         gid1_lid1;
20   PetscTablePosition tpos;
21   PetscInt           gid,lid;
22 #else
23   PetscInt           N = mat->cmap->N,*indices;
24 #endif
25 
26   PetscFunctionBegin;
27 
28 #if defined (PETSC_USE_CTABLE)
29   /* use a table - Mark Adams */
30   ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr);
31   for (i=0; i<aij->B->rmap->n; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       PetscInt data,gid1 = aj[B->i[i] + j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
43   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
44   while (tpos) {
45     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
46     gid--;
47     lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */
51   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52   for (i=0; i<ec; i++) {
53     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
54   }
55   /* compact out the extra columns in B */
56   for (i=0; i<aij->B->rmap->n; i++) {
57     for (j=0; j<B->ilen[i]; j++) {
58       PetscInt gid1 = aj[B->i[i] + j] + 1;
59       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60       lid --;
61       aj[B->i[i] + j]  = lid;
62     }
63   }
64   aij->B->cmap->n = aij->B->cmap->N = ec;
65   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
66   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
67   /* Mark Adams */
68 #else
69   /* Make an array as long as the number of columns */
70   /* mark those columns that are in aij->B */
71   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
72   ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr);
73   for (i=0; i<aij->B->rmap->n; i++) {
74     for (j=0; j<B->ilen[i]; j++) {
75       if (!indices[aj[B->i[i] + j] ]) ec++;
76       indices[aj[B->i[i] + j] ] = 1;
77     }
78   }
79 
80   /* form array of columns we need */
81   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
82   ec = 0;
83   for (i=0; i<N; i++) {
84     if (indices[i]) garray[ec++] = i;
85   }
86 
87   /* make indices now point into garray */
88   for (i=0; i<ec; i++) {
89     indices[garray[i]] = i;
90   }
91 
92   /* compact out the extra columns in B */
93   for (i=0; i<aij->B->rmap->n; i++) {
94     for (j=0; j<B->ilen[i]; j++) {
95       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
96     }
97   }
98   aij->B->cmap->n = aij->B->cmap->N = ec;
99   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
100   ierr = PetscFree(indices);CHKERRQ(ierr);
101 #endif
102   /* create local vector that is used to scatter into */
103   ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr);
104 
105   /* create two temporary Index sets for build scatter gather */
106   /*  check for the special case where blocks are communicated for faster VecScatterXXX */
107   useblockis = PETSC_FALSE;
108   if (mat->rmap->bs > 1) {
109     PetscInt bs = mat->rmap->bs,ibs,ga;
110     if (!(ec % bs)) {
111       useblockis = PETSC_TRUE;
112       for (i=0; i<ec/bs; i++) {
113         if ((ga = garray[ibs = i*bs]) % bs) {
114           useblockis = PETSC_FALSE;
115           break;
116         }
117         for (j=1; j<bs; j++) {
118           if (garray[ibs+j] != ga+j) {
119             useblockis = PETSC_FALSE;
120             break;
121           }
122         }
123         if (!useblockis) break;
124       }
125     }
126   }
127   if (useblockis) {
128     PetscInt *ga,bs = mat->rmap->bs,iec = ec/bs;
129     ierr = PetscInfo(mat,"Using block index set to define scatter\n");
130     ierr = PetscMalloc((ec/mat->rmap->bs)*sizeof(PetscInt),&ga);CHKERRQ(ierr);
131     for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs;
132     ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr);
133   } else {
134     ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
135   }
136   ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr);
137 
138   /* create temporary global vector to generate scatter context */
139   /* This does not allocate the array's memory so is efficient */
140   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
141 
142   /* generate the scatter context */
143   ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr);
144   ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr);
145   ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr);
146   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
147   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
148   aij->garray = garray;
149   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
150   ierr = ISDestroy(&from);CHKERRQ(ierr);
151   ierr = ISDestroy(&to);CHKERRQ(ierr);
152   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatDisAssemble_MPIAIJ"
159 /*
160      Takes the local part of an already assembled MPIAIJ matrix
161    and disassembles it. This is to allow new nonzeros into the matrix
162    that require more communication in the matrix vector multiply.
163    Thus certain data-structures must be rebuilt.
164 
165    Kind of slow! But that's what application programmers get when
166    they are sloppy.
167 */
168 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A)
169 {
170   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
171   Mat            B = aij->B,Bnew;
172   Mat_SeqAIJ     *Baij = (Mat_SeqAIJ*)B->data;
173   PetscErrorCode ierr;
174   PetscInt       i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec;
175   PetscScalar    v;
176 
177   PetscFunctionBegin;
178   /* free stuff related to matrix-vec multiply */
179   ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
180   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
181   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
182   if (aij->colmap) {
183 #if defined (PETSC_USE_CTABLE)
184     ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
185 #else
186     ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
187     ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr);
188 #endif
189   }
190 
191   /* make sure that B is assembled so we can access its values */
192   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
193   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
194 
195   /* invent new B and copy stuff over */
196   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr);
197   for (i=0; i<m; i++) {
198     nz[i] = Baij->i[i+1] - Baij->i[i];
199   }
200   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
201   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
202   ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
203   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
204   ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr);
205   ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */
206   ierr = PetscFree(nz);CHKERRQ(ierr);
207   for (i=0; i<m; i++) {
208     for (j=Baij->i[i]; j<Baij->i[i+1]; j++) {
209       col  = garray[Baij->j[ct]];
210       v    = Baij->a[ct++];
211       ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr);
212     }
213   }
214   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
215   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
216   ierr = MatDestroy(&B);CHKERRQ(ierr);
217   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
218   aij->B = Bnew;
219   A->was_assembled = PETSC_FALSE;
220   PetscFunctionReturn(0);
221 }
222 
223 /*      ugly stuff added for Glenn someday we should fix this up */
224 
225 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
226                                       parts of the local matrix */
227 static Vec auglydd = 0,auglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
228 
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp"
232 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
233 {
234   Mat_MPIAIJ     *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */
235   PetscErrorCode ierr;
236   PetscInt       i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices;
237   PetscInt       *r_rmapd,*r_rmapo;
238 
239   PetscFunctionBegin;
240   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
241   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
242   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
243   ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
244   nt   = 0;
245   for (i=0; i<inA->rmap->mapping->n; i++) {
246     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
247       nt++;
248       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
249     }
250   }
251   if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n);
252   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr);
253   for (i=0; i<inA->rmap->mapping->n; i++) {
254     if (r_rmapd[i]){
255       auglyrmapd[(r_rmapd[i]-1)-cstart] = i;
256     }
257   }
258   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
259   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr);
260 
261   ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
262   ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
263   for (i=0; i<ina->B->cmap->n; i++) {
264     lindices[garray[i]] = i+1;
265   }
266   no   = inA->rmap->mapping->n - nt;
267   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
268   ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
269   nt   = 0;
270   for (i=0; i<inA->rmap->mapping->n; i++) {
271     if (lindices[inA->rmap->mapping->indices[i]]) {
272       nt++;
273       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
274     }
275   }
276   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
277   ierr = PetscFree(lindices);CHKERRQ(ierr);
278   ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr);
279   for (i=0; i<inA->rmap->mapping->n; i++) {
280     if (r_rmapo[i]){
281       auglyrmapo[(r_rmapo[i]-1)] = i;
282     }
283   }
284   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
285   ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr);
286 
287   PetscFunctionReturn(0);
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal"
292 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale)
293 {
294   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
295   PetscErrorCode ierr;
296 
297   PetscFunctionBegin;
298   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 EXTERN_C_BEGIN
303 #undef __FUNCT__
304 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ"
305 PetscErrorCode  MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)
306 {
307   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */
308   PetscErrorCode ierr;
309   PetscInt       n,i;
310   PetscScalar    *d,*o,*s;
311 
312   PetscFunctionBegin;
313   if (!auglyrmapd) {
314     ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
315   }
316 
317   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
318 
319   ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr);
320   ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr);
321   for (i=0; i<n; i++) {
322     d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
323   }
324   ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr);
325   /* column scale "diagonal" portion of local matrix */
326   ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr);
327 
328   ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr);
329   ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr);
330   for (i=0; i<n; i++) {
331     o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
332   }
333   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
334   ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr);
335   /* column scale "off-diagonal" portion of local matrix */
336   ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr);
337 
338   PetscFunctionReturn(0);
339 }
340 EXTERN_C_END
341 
342 
343