xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision c3b5f7ba6bc5ce25a01a67bb37ba5d34b02bbbd7)
1 
2 /*
3    Support for the parallel AIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/aij/mpi/mpiaij.h>
6 #include <petsc/private/vecimpl.h>
7 #include <petsc/private/isimpl.h>    /* needed because accesses data structure of ISLocalToGlobalMapping directly */
8 
9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ         *aij = (Mat_MPIAIJ*)mat->data;
12   Mat_SeqAIJ         *B   = (Mat_SeqAIJ*)(aij->B->data);
13   PetscInt           i,j,*aj = B->j,*garray;
14   PetscInt           ec = 0; /* Number of nonzero external columns */
15   IS                 from,to;
16   Vec                gvec;
17 #if defined(PETSC_USE_CTABLE)
18   PetscTable         gid1_lid1;
19   PetscTablePosition tpos;
20   PetscInt           gid,lid;
21 #else
22   PetscInt           N = mat->cmap->N,*indices;
23 #endif
24 
25   PetscFunctionBegin;
26   if (!aij->garray) {
27     PetscCheck(aij->B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing B mat");
28 #if defined(PETSC_USE_CTABLE)
29     /* use a table */
30     PetscCall(PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1));
31     for (i=0; i<aij->B->rmap->n; i++) {
32       for (j=0; j<B->ilen[i]; j++) {
33         PetscInt data,gid1 = aj[B->i[i] + j] + 1;
34         PetscCall(PetscTableFind(gid1_lid1,gid1,&data));
35         if (!data) {
36           /* one based table */
37           PetscCall(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES));
38         }
39       }
40     }
41     /* form array of columns we need */
42     PetscCall(PetscMalloc1(ec,&garray));
43     PetscCall(PetscTableGetHeadPosition(gid1_lid1,&tpos));
44     while (tpos) {
45       PetscCall(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid));
46       gid--;
47       lid--;
48       garray[lid] = gid;
49     }
50     PetscCall(PetscSortInt(ec,garray)); /* sort, and rebuild */
51     PetscCall(PetscTableRemoveAll(gid1_lid1));
52     for (i=0; i<ec; i++) {
53       PetscCall(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES));
54     }
55     /* compact out the extra columns in B */
56     for (i=0; i<aij->B->rmap->n; i++) {
57       for (j=0; j<B->ilen[i]; j++) {
58         PetscInt gid1 = aj[B->i[i] + j] + 1;
59         PetscCall(PetscTableFind(gid1_lid1,gid1,&lid));
60         lid--;
61         aj[B->i[i] + j] = lid;
62       }
63     }
64     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
65     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap));
66     PetscCall(PetscTableDestroy(&gid1_lid1));
67 #else
68     /* Make an array as long as the number of columns */
69     /* mark those columns that are in aij->B */
70     PetscCall(PetscCalloc1(N,&indices));
71     for (i=0; i<aij->B->rmap->n; i++) {
72       for (j=0; j<B->ilen[i]; j++) {
73         if (!indices[aj[B->i[i] + j]]) ec++;
74         indices[aj[B->i[i] + j]] = 1;
75       }
76     }
77 
78     /* form array of columns we need */
79     PetscCall(PetscMalloc1(ec,&garray));
80     ec   = 0;
81     for (i=0; i<N; i++) {
82       if (indices[i]) garray[ec++] = i;
83     }
84 
85     /* make indices now point into garray */
86     for (i=0; i<ec; i++) {
87       indices[garray[i]] = i;
88     }
89 
90     /* compact out the extra columns in B */
91     for (i=0; i<aij->B->rmap->n; i++) {
92       for (j=0; j<B->ilen[i]; j++) {
93         aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
94       }
95     }
96     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
97     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap));
98     PetscCall(PetscFree(indices));
99 #endif
100   } else {
101     garray = aij->garray;
102   }
103 
104   if (!aij->lvec) {
105     PetscCheck(aij->B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing B mat");
106     PetscCall(MatCreateVecs(aij->B,&aij->lvec,NULL));
107   }
108   PetscCall(VecGetSize(aij->lvec,&ec));
109 
110   /* create two temporary Index sets for build scatter gather */
111   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ec,garray,PETSC_COPY_VALUES,&from));
112   PetscCall(ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to));
113 
114   /* create temporary global vector to generate scatter context */
115   /* This does not allocate the array's memory so is efficient */
116   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec));
117 
118   /* generate the scatter context */
119   PetscCall(VecScatterDestroy(&aij->Mvctx));
120   PetscCall(VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx));
121   PetscCall(VecScatterViewFromOptions(aij->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view"));
122   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx));
123   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec));
124   PetscCall(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt)));
125   aij->garray = garray;
126 
127   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)from));
128   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)to));
129 
130   PetscCall(ISDestroy(&from));
131   PetscCall(ISDestroy(&to));
132   PetscCall(VecDestroy(&gvec));
133   PetscFunctionReturn(0);
134 }
135 
136 /* Disassemble the off-diag portion of the MPIAIJXxx matrix.
137    In other words, change the B from reduced format using local col ids
138    to expanded format using global col ids, which will make it easier to
139    insert new nonzeros (with global col ids) into the matrix.
140    The off-diag B determines communication in the matrix vector multiply.
141 */
142 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A)
143 {
144   Mat_MPIAIJ        *aij  = (Mat_MPIAIJ*)A->data;
145   Mat               B     = aij->B,Bnew;
146   Mat_SeqAIJ        *Baij = (Mat_SeqAIJ*)B->data;
147   PetscInt          i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec;
148   PetscScalar       v;
149   const PetscScalar *ba;
150 
151   PetscFunctionBegin;
152   /* free stuff related to matrix-vec multiply */
153   PetscCall(VecGetSize(aij->lvec,&ec)); /* needed for PetscLogObjectMemory below */
154   PetscCall(VecDestroy(&aij->lvec));
155   if (aij->colmap) {
156 #if defined(PETSC_USE_CTABLE)
157     PetscCall(PetscTableDestroy(&aij->colmap));
158 #else
159     PetscCall(PetscFree(aij->colmap));
160     PetscCall(PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt)));
161 #endif
162   }
163 
164   /* make sure that B is assembled so we can access its values */
165   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
166   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
167 
168   /* invent new B and copy stuff over */
169   PetscCall(PetscMalloc1(m+1,&nz));
170   for (i=0; i<m; i++) {
171     nz[i] = Baij->i[i+1] - Baij->i[i];
172   }
173   PetscCall(MatCreate(PETSC_COMM_SELF,&Bnew));
174   PetscCall(MatSetSizes(Bnew,m,n,m,n)); /* Bnew now uses A->cmap->N as its col size */
175   PetscCall(MatSetBlockSizesFromMats(Bnew,A,A));
176   PetscCall(MatSetType(Bnew,((PetscObject)B)->type_name));
177   PetscCall(MatSeqAIJSetPreallocation(Bnew,0,nz));
178 
179   if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */
180     ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew;
181   }
182 
183   /*
184    Ensure that B's nonzerostate is monotonically increasing.
185    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
186    */
187   Bnew->nonzerostate = B->nonzerostate;
188 
189   PetscCall(PetscFree(nz));
190   PetscCall(MatSeqAIJGetArrayRead(B,&ba));
191   for (i=0; i<m; i++) {
192     for (j=Baij->i[i]; j<Baij->i[i+1]; j++) {
193       col  = garray[Baij->j[ct]];
194       v    = ba[ct++];
195       PetscCall(MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode));
196     }
197   }
198   PetscCall(MatSeqAIJRestoreArrayRead(B,&ba));
199 
200   PetscCall(PetscFree(aij->garray));
201   PetscCall(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt)));
202   PetscCall(MatDestroy(&B));
203   PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew));
204 
205   aij->B           = Bnew;
206   A->was_assembled = PETSC_FALSE;
207   PetscFunctionReturn(0);
208 }
209 
210 /*      ugly stuff added for Glenn someday we should fix this up */
211 
212 static PetscInt *auglyrmapd = NULL,*auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
213 static Vec auglydd          = NULL,auglyoo     = NULL; /* work vectors used to scale the two parts of the local matrix */
214 
215 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
216 {
217   Mat_MPIAIJ     *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */
218   PetscInt       i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices;
219   PetscInt       *r_rmapd,*r_rmapo;
220 
221   PetscFunctionBegin;
222   PetscCall(MatGetOwnershipRange(inA,&cstart,&cend));
223   PetscCall(MatGetSize(ina->A,NULL,&n));
224   PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd));
225   nt   = 0;
226   for (i=0; i<inA->rmap->mapping->n; i++) {
227     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
228       nt++;
229       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
230     }
231   }
232   PetscCheckFalse(nt != n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT,nt,n);
233   PetscCall(PetscMalloc1(n+1,&auglyrmapd));
234   for (i=0; i<inA->rmap->mapping->n; i++) {
235     if (r_rmapd[i]) {
236       auglyrmapd[(r_rmapd[i]-1)-cstart] = i;
237     }
238   }
239   PetscCall(PetscFree(r_rmapd));
240   PetscCall(VecCreateSeq(PETSC_COMM_SELF,n,&auglydd));
241 
242   PetscCall(PetscCalloc1(inA->cmap->N+1,&lindices));
243   for (i=0; i<ina->B->cmap->n; i++) {
244     lindices[garray[i]] = i+1;
245   }
246   no   = inA->rmap->mapping->n - nt;
247   PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo));
248   nt   = 0;
249   for (i=0; i<inA->rmap->mapping->n; i++) {
250     if (lindices[inA->rmap->mapping->indices[i]]) {
251       nt++;
252       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
253     }
254   }
255   PetscCheckFalse(nt > no,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT,nt,n);
256   PetscCall(PetscFree(lindices));
257   PetscCall(PetscMalloc1(nt+1,&auglyrmapo));
258   for (i=0; i<inA->rmap->mapping->n; i++) {
259     if (r_rmapo[i]) {
260       auglyrmapo[(r_rmapo[i]-1)] = i;
261     }
262   }
263   PetscCall(PetscFree(r_rmapo));
264   PetscCall(VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo));
265   PetscFunctionReturn(0);
266 }
267 
268 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale)
269 {
270   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
271 
272   PetscFunctionBegin;
273   PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));
274   PetscFunctionReturn(0);
275 }
276 
277 PetscErrorCode  MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)
278 {
279   Mat_MPIAIJ        *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */
280   PetscInt          n,i;
281   PetscScalar       *d,*o;
282   const PetscScalar *s;
283 
284   PetscFunctionBegin;
285   if (!auglyrmapd) {
286     PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A,scale));
287   }
288 
289   PetscCall(VecGetArrayRead(scale,&s));
290 
291   PetscCall(VecGetLocalSize(auglydd,&n));
292   PetscCall(VecGetArray(auglydd,&d));
293   for (i=0; i<n; i++) {
294     d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
295   }
296   PetscCall(VecRestoreArray(auglydd,&d));
297   /* column scale "diagonal" portion of local matrix */
298   PetscCall(MatDiagonalScale(a->A,NULL,auglydd));
299 
300   PetscCall(VecGetLocalSize(auglyoo,&n));
301   PetscCall(VecGetArray(auglyoo,&o));
302   for (i=0; i<n; i++) {
303     o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
304   }
305   PetscCall(VecRestoreArrayRead(scale,&s));
306   PetscCall(VecRestoreArray(auglyoo,&o));
307   /* column scale "off-diagonal" portion of local matrix */
308   PetscCall(MatDiagonalScale(a->B,NULL,auglyoo));
309   PetscFunctionReturn(0);
310 }
311