xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision 28b400f66ebc7ae0049166a2294dfcd3df27e64b)
1 
2 /*
3    Support for the parallel AIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/aij/mpi/mpiaij.h>
6 #include <petsc/private/vecimpl.h>
7 #include <petsc/private/isimpl.h>    /* needed because accesses data structure of ISLocalToGlobalMapping directly */
8 
9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ         *aij = (Mat_MPIAIJ*)mat->data;
12   Mat_SeqAIJ         *B   = (Mat_SeqAIJ*)(aij->B->data);
13   PetscInt           i,j,*aj = B->j,*garray;
14   PetscInt           ec = 0; /* Number of nonzero external columns */
15   IS                 from,to;
16   Vec                gvec;
17 #if defined(PETSC_USE_CTABLE)
18   PetscTable         gid1_lid1;
19   PetscTablePosition tpos;
20   PetscInt           gid,lid;
21 #else
22   PetscInt           N = mat->cmap->N,*indices;
23 #endif
24 
25   PetscFunctionBegin;
26   if (!aij->garray) {
27     PetscCheck(aij->B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing B mat");
28 #if defined(PETSC_USE_CTABLE)
29     /* use a table */
30     CHKERRQ(PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1));
31     for (i=0; i<aij->B->rmap->n; i++) {
32       for (j=0; j<B->ilen[i]; j++) {
33         PetscInt data,gid1 = aj[B->i[i] + j] + 1;
34         CHKERRQ(PetscTableFind(gid1_lid1,gid1,&data));
35         if (!data) {
36           /* one based table */
37           CHKERRQ(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES));
38         }
39       }
40     }
41     /* form array of columns we need */
42     CHKERRQ(PetscMalloc1(ec,&garray));
43     CHKERRQ(PetscTableGetHeadPosition(gid1_lid1,&tpos));
44     while (tpos) {
45       CHKERRQ(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid));
46       gid--;
47       lid--;
48       garray[lid] = gid;
49     }
50     CHKERRQ(PetscSortInt(ec,garray)); /* sort, and rebuild */
51     CHKERRQ(PetscTableRemoveAll(gid1_lid1));
52     for (i=0; i<ec; i++) {
53       CHKERRQ(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES));
54     }
55     /* compact out the extra columns in B */
56     for (i=0; i<aij->B->rmap->n; i++) {
57       for (j=0; j<B->ilen[i]; j++) {
58         PetscInt gid1 = aj[B->i[i] + j] + 1;
59         CHKERRQ(PetscTableFind(gid1_lid1,gid1,&lid));
60         lid--;
61         aj[B->i[i] + j] = lid;
62       }
63     }
64     CHKERRQ(PetscLayoutDestroy(&aij->B->cmap));
65     CHKERRQ(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap));
66     CHKERRQ(PetscTableDestroy(&gid1_lid1));
67 #else
68     /* Make an array as long as the number of columns */
69     /* mark those columns that are in aij->B */
70     CHKERRQ(PetscCalloc1(N,&indices));
71     for (i=0; i<aij->B->rmap->n; i++) {
72       for (j=0; j<B->ilen[i]; j++) {
73         if (!indices[aj[B->i[i] + j]]) ec++;
74         indices[aj[B->i[i] + j]] = 1;
75       }
76     }
77 
78     /* form array of columns we need */
79     CHKERRQ(PetscMalloc1(ec,&garray));
80     ec   = 0;
81     for (i=0; i<N; i++) {
82       if (indices[i]) garray[ec++] = i;
83     }
84 
85     /* make indices now point into garray */
86     for (i=0; i<ec; i++) {
87       indices[garray[i]] = i;
88     }
89 
90     /* compact out the extra columns in B */
91     for (i=0; i<aij->B->rmap->n; i++) {
92       for (j=0; j<B->ilen[i]; j++) {
93         aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
94       }
95     }
96     CHKERRQ(PetscLayoutDestroy(&aij->B->cmap));
97     CHKERRQ(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap));
98     CHKERRQ(PetscFree(indices));
99 #endif
100   } else {
101     garray = aij->garray;
102   }
103 
104   if (!aij->lvec) {
105     PetscCheck(aij->B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing B mat");
106     CHKERRQ(MatCreateVecs(aij->B,&aij->lvec,NULL));
107   }
108   CHKERRQ(VecGetSize(aij->lvec,&ec));
109 
110   /* create two temporary Index sets for build scatter gather */
111   CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,ec,garray,PETSC_COPY_VALUES,&from));
112   CHKERRQ(ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to));
113 
114   /* create temporary global vector to generate scatter context */
115   /* This does not allocate the array's memory so is efficient */
116   CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec));
117 
118   /* generate the scatter context */
119   CHKERRQ(VecScatterDestroy(&aij->Mvctx));
120   CHKERRQ(VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx));
121   CHKERRQ(VecScatterViewFromOptions(aij->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view"));
122   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx));
123   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec));
124   CHKERRQ(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt)));
125   aij->garray = garray;
126 
127   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)from));
128   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)to));
129 
130   CHKERRQ(ISDestroy(&from));
131   CHKERRQ(ISDestroy(&to));
132   CHKERRQ(VecDestroy(&gvec));
133   PetscFunctionReturn(0);
134 }
135 
136 /* Disassemble the off-diag portion of the MPIAIJXxx matrix.
137    In other words, change the B from reduced format using local col ids
138    to expanded format using global col ids, which will make it easier to
139    insert new nonzeros (with global col ids) into the matrix.
140    The off-diag B determines communication in the matrix vector multiply.
141 */
142 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A)
143 {
144   Mat_MPIAIJ        *aij  = (Mat_MPIAIJ*)A->data;
145   Mat               B     = aij->B,Bnew;
146   Mat_SeqAIJ        *Baij = (Mat_SeqAIJ*)B->data;
147   PetscInt          i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec;
148   PetscScalar       v;
149   const PetscScalar *ba;
150 
151   PetscFunctionBegin;
152   /* free stuff related to matrix-vec multiply */
153   CHKERRQ(VecGetSize(aij->lvec,&ec)); /* needed for PetscLogObjectMemory below */
154   CHKERRQ(VecDestroy(&aij->lvec));
155   if (aij->colmap) {
156 #if defined(PETSC_USE_CTABLE)
157     CHKERRQ(PetscTableDestroy(&aij->colmap));
158 #else
159     CHKERRQ(PetscFree(aij->colmap));
160     CHKERRQ(PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt)));
161 #endif
162   }
163 
164   /* make sure that B is assembled so we can access its values */
165   CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
166   CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
167 
168   /* invent new B and copy stuff over */
169   CHKERRQ(PetscMalloc1(m+1,&nz));
170   for (i=0; i<m; i++) {
171     nz[i] = Baij->i[i+1] - Baij->i[i];
172   }
173   CHKERRQ(MatCreate(PETSC_COMM_SELF,&Bnew));
174   CHKERRQ(MatSetSizes(Bnew,m,n,m,n)); /* Bnew now uses A->cmap->N as its col size */
175   CHKERRQ(MatSetBlockSizesFromMats(Bnew,A,A));
176   CHKERRQ(MatSetType(Bnew,((PetscObject)B)->type_name));
177   CHKERRQ(MatSeqAIJSetPreallocation(Bnew,0,nz));
178 
179   if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */
180     ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew;
181   }
182 
183   /*
184    Ensure that B's nonzerostate is monotonically increasing.
185    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
186    */
187   Bnew->nonzerostate = B->nonzerostate;
188 
189   CHKERRQ(PetscFree(nz));
190   CHKERRQ(MatSeqAIJGetArrayRead(B,&ba));
191   for (i=0; i<m; i++) {
192     for (j=Baij->i[i]; j<Baij->i[i+1]; j++) {
193       col  = garray[Baij->j[ct]];
194       v    = ba[ct++];
195       CHKERRQ(MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode));
196     }
197   }
198   CHKERRQ(MatSeqAIJRestoreArrayRead(B,&ba));
199 
200   CHKERRQ(PetscFree(aij->garray));
201   CHKERRQ(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt)));
202   CHKERRQ(MatDestroy(&B));
203   CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew));
204 
205   aij->B           = Bnew;
206   A->was_assembled = PETSC_FALSE;
207   PetscFunctionReturn(0);
208 }
209 
210 /*      ugly stuff added for Glenn someday we should fix this up */
211 
212 static PetscInt *auglyrmapd = NULL,*auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
213 static Vec auglydd          = NULL,auglyoo     = NULL; /* work vectors used to scale the two parts of the local matrix */
214 
215 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
216 {
217   Mat_MPIAIJ     *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */
218   PetscInt       i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices;
219   PetscInt       *r_rmapd,*r_rmapo;
220 
221   PetscFunctionBegin;
222   CHKERRQ(MatGetOwnershipRange(inA,&cstart,&cend));
223   CHKERRQ(MatGetSize(ina->A,NULL,&n));
224   CHKERRQ(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd));
225   nt   = 0;
226   for (i=0; i<inA->rmap->mapping->n; i++) {
227     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
228       nt++;
229       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
230     }
231   }
232   PetscCheckFalse(nt != n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT,nt,n);
233   CHKERRQ(PetscMalloc1(n+1,&auglyrmapd));
234   for (i=0; i<inA->rmap->mapping->n; i++) {
235     if (r_rmapd[i]) {
236       auglyrmapd[(r_rmapd[i]-1)-cstart] = i;
237     }
238   }
239   CHKERRQ(PetscFree(r_rmapd));
240   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,n,&auglydd));
241 
242   CHKERRQ(PetscCalloc1(inA->cmap->N+1,&lindices));
243   for (i=0; i<ina->B->cmap->n; i++) {
244     lindices[garray[i]] = i+1;
245   }
246   no   = inA->rmap->mapping->n - nt;
247   CHKERRQ(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo));
248   nt   = 0;
249   for (i=0; i<inA->rmap->mapping->n; i++) {
250     if (lindices[inA->rmap->mapping->indices[i]]) {
251       nt++;
252       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
253     }
254   }
255   PetscCheckFalse(nt > no,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT,nt,n);
256   CHKERRQ(PetscFree(lindices));
257   CHKERRQ(PetscMalloc1(nt+1,&auglyrmapo));
258   for (i=0; i<inA->rmap->mapping->n; i++) {
259     if (r_rmapo[i]) {
260       auglyrmapo[(r_rmapo[i]-1)] = i;
261     }
262   }
263   CHKERRQ(PetscFree(r_rmapo));
264   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo));
265   PetscFunctionReturn(0);
266 }
267 
268 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale)
269 {
270   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
271 
272   PetscFunctionBegin;
273   CHKERRQ(PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale)));
274   PetscFunctionReturn(0);
275 }
276 
277 PetscErrorCode  MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)
278 {
279   Mat_MPIAIJ        *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */
280   PetscInt          n,i;
281   PetscScalar       *d,*o;
282   const PetscScalar *s;
283 
284   PetscFunctionBegin;
285   if (!auglyrmapd) {
286     CHKERRQ(MatMPIAIJDiagonalScaleLocalSetUp(A,scale));
287   }
288 
289   CHKERRQ(VecGetArrayRead(scale,&s));
290 
291   CHKERRQ(VecGetLocalSize(auglydd,&n));
292   CHKERRQ(VecGetArray(auglydd,&d));
293   for (i=0; i<n; i++) {
294     d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
295   }
296   CHKERRQ(VecRestoreArray(auglydd,&d));
297   /* column scale "diagonal" portion of local matrix */
298   CHKERRQ(MatDiagonalScale(a->A,NULL,auglydd));
299 
300   CHKERRQ(VecGetLocalSize(auglyoo,&n));
301   CHKERRQ(VecGetArray(auglyoo,&o));
302   for (i=0; i<n; i++) {
303     o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
304   }
305   CHKERRQ(VecRestoreArrayRead(scale,&s));
306   CHKERRQ(VecRestoreArray(auglyoo,&o));
307   /* column scale "off-diagonal" portion of local matrix */
308   CHKERRQ(MatDiagonalScale(a->B,NULL,auglyoo));
309   PetscFunctionReturn(0);
310 }
311