xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision d53a3d6fea7331c598f751e9a4893fffdfd7b7a2)
1 
2 /*
3    Support for the parallel AIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/aij/mpi/mpiaij.h>
6 #include <petsc-private/isimpl.h>    /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ"
10 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
11 {
12   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
13   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)(aij->B->data);
14   PetscErrorCode ierr;
15   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
16   IS             from,to;
17   Vec            gvec;
18   PetscBool      useblockis;
19 #if defined(PETSC_USE_CTABLE)
20   PetscTable         gid1_lid1;
21   PetscTablePosition tpos;
22   PetscInt           gid,lid;
23 #else
24   PetscInt N = mat->cmap->N,*indices;
25 #endif
26 
27   PetscFunctionBegin;
28 #if defined(PETSC_USE_CTABLE)
29   /* use a table */
30   ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr);
31   for (i=0; i<aij->B->rmap->n; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       PetscInt data,gid1 = aj[B->i[i] + j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
43   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
44   while (tpos) {
45     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
46     gid--;
47     lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */
51   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52   for (i=0; i<ec; i++) {
53     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
54   }
55   /* compact out the extra columns in B */
56   for (i=0; i<aij->B->rmap->n; i++) {
57     for (j=0; j<B->ilen[i]; j++) {
58       PetscInt gid1 = aj[B->i[i] + j] + 1;
59       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60       lid--;
61       aj[B->i[i] + j] = lid;
62     }
63   }
64   aij->B->cmap->n = aij->B->cmap->N = ec;
65 
66   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
67   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
68 #else
69   /* Make an array as long as the number of columns */
70   /* mark those columns that are in aij->B */
71   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
72   ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr);
73   for (i=0; i<aij->B->rmap->n; i++) {
74     for (j=0; j<B->ilen[i]; j++) {
75       if (!indices[aj[B->i[i] + j]]) ec++;
76       indices[aj[B->i[i] + j]] = 1;
77     }
78   }
79 
80   /* form array of columns we need */
81   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
82   ec   = 0;
83   for (i=0; i<N; i++) {
84     if (indices[i]) garray[ec++] = i;
85   }
86 
87   /* make indices now point into garray */
88   for (i=0; i<ec; i++) {
89     indices[garray[i]] = i;
90   }
91 
92   /* compact out the extra columns in B */
93   for (i=0; i<aij->B->rmap->n; i++) {
94     for (j=0; j<B->ilen[i]; j++) {
95       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
96     }
97   }
98   aij->B->cmap->n = aij->B->cmap->N = ec;
99 
100   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
101   ierr = PetscFree(indices);CHKERRQ(ierr);
102 #endif
103   /* create local vector that is used to scatter into */
104   ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr);
105 
106   /* create two temporary Index sets for build scatter gather */
107   /*  check for the special case where blocks are communicated for faster VecScatterXXX */
108   useblockis = PETSC_FALSE;
109   if (mat->cmap->bs > 1) {
110     PetscInt bs = mat->cmap->bs,ibs,ga;
111     if (!(ec % bs)) {
112       useblockis = PETSC_TRUE;
113       for (i=0; i<ec/bs; i++) {
114         if ((ga = garray[ibs = i*bs]) % bs) {
115           useblockis = PETSC_FALSE;
116           break;
117         }
118         for (j=1; j<bs; j++) {
119           if (garray[ibs+j] != ga+j) {
120             useblockis = PETSC_FALSE;
121             break;
122           }
123         }
124         if (!useblockis) break;
125       }
126     }
127   }
128 #if defined(PETSC_USE_DEBUG)
129   i    = (PetscInt)useblockis;
130   ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
131   if (j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)");
132 #endif
133 
134   if (useblockis) {
135     PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs;
136     if (ec%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs);
137     ierr = PetscInfo(mat,"Using block index set to define scatter\n");CHKERRQ(ierr);
138     ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr);
139     for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs;
140     ierr = ISCreateBlock(PetscObjectComm((PetscObject)mat),bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr);
141   } else {
142     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
143   }
144 
145   ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr);
146 
147   /* create temporary global vector to generate scatter context */
148   /* This does not allocate the array's memory so is efficient */
149   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
150 
151   /* generate the scatter context */
152   ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr);
153   ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr);
154   ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr);
155   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
156   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
157 
158   aij->garray = garray;
159 
160   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
161   ierr = ISDestroy(&from);CHKERRQ(ierr);
162   ierr = ISDestroy(&to);CHKERRQ(ierr);
163   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 
168 #undef __FUNCT__
169 #define __FUNCT__ "MatDisAssemble_MPIAIJ"
170 /*
171      Takes the local part of an already assembled MPIAIJ matrix
172    and disassembles it. This is to allow new nonzeros into the matrix
173    that require more communication in the matrix vector multiply.
174    Thus certain data-structures must be rebuilt.
175 
176    Kind of slow! But that's what application programmers get when
177    they are sloppy.
178 */
179 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A)
180 {
181   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)A->data;
182   Mat            B     = aij->B,Bnew;
183   Mat_SeqAIJ     *Baij = (Mat_SeqAIJ*)B->data;
184   PetscErrorCode ierr;
185   PetscInt       i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec;
186   PetscScalar    v;
187 
188   PetscFunctionBegin;
189   /* free stuff related to matrix-vec multiply */
190   ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
191   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
192   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
193   if (aij->colmap) {
194 #if defined(PETSC_USE_CTABLE)
195     ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
196 #else
197     ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
198     ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr);
199 #endif
200   }
201 
202   /* make sure that B is assembled so we can access its values */
203   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
204   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
205 
206   /* invent new B and copy stuff over */
207   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr);
208   for (i=0; i<m; i++) {
209     nz[i] = Baij->i[i+1] - Baij->i[i];
210   }
211   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
212   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
213   ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
214   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
215   ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr);
216 
217   ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */
218 
219   ierr = PetscFree(nz);CHKERRQ(ierr);
220   for (i=0; i<m; i++) {
221     for (j=Baij->i[i]; j<Baij->i[i+1]; j++) {
222       col  = garray[Baij->j[ct]];
223       v    = Baij->a[ct++];
224       ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr);
225     }
226   }
227   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
228   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
229   ierr = MatDestroy(&B);CHKERRQ(ierr);
230   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
231 
232   aij->B           = Bnew;
233   A->was_assembled = PETSC_FALSE;
234   PetscFunctionReturn(0);
235 }
236 
237 /*      ugly stuff added for Glenn someday we should fix this up */
238 
239 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
240 static Vec auglydd          = 0,auglyoo     = 0; /* work vectors used to scale the two parts of the local matrix */
241 
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp"
245 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
246 {
247   Mat_MPIAIJ     *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */
248   PetscErrorCode ierr;
249   PetscInt       i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices;
250   PetscInt       *r_rmapd,*r_rmapo;
251 
252   PetscFunctionBegin;
253   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
254   ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr);
255   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
256   ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
257   nt   = 0;
258   for (i=0; i<inA->rmap->mapping->n; i++) {
259     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
260       nt++;
261       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
262     }
263   }
264   if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n);
265   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr);
266   for (i=0; i<inA->rmap->mapping->n; i++) {
267     if (r_rmapd[i]) {
268       auglyrmapd[(r_rmapd[i]-1)-cstart] = i;
269     }
270   }
271   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
272   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr);
273 
274   ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
275   ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
276   for (i=0; i<ina->B->cmap->n; i++) {
277     lindices[garray[i]] = i+1;
278   }
279   no   = inA->rmap->mapping->n - nt;
280   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
281   ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
282   nt   = 0;
283   for (i=0; i<inA->rmap->mapping->n; i++) {
284     if (lindices[inA->rmap->mapping->indices[i]]) {
285       nt++;
286       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
287     }
288   }
289   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
290   ierr = PetscFree(lindices);CHKERRQ(ierr);
291   ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr);
292   for (i=0; i<inA->rmap->mapping->n; i++) {
293     if (r_rmapo[i]) {
294       auglyrmapo[(r_rmapo[i]-1)] = i;
295     }
296   }
297   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
298   ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal"
304 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale)
305 {
306   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
307   PetscErrorCode ierr;
308 
309   PetscFunctionBegin;
310   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 EXTERN_C_BEGIN
315 #undef __FUNCT__
316 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ"
317 PetscErrorCode  MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)
318 {
319   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */
320   PetscErrorCode ierr;
321   PetscInt       n,i;
322   PetscScalar    *d,*o,*s;
323 
324   PetscFunctionBegin;
325   if (!auglyrmapd) {
326     ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
327   }
328 
329   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
330 
331   ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr);
332   ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr);
333   for (i=0; i<n; i++) {
334     d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
335   }
336   ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr);
337   /* column scale "diagonal" portion of local matrix */
338   ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr);
339 
340   ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr);
341   ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr);
342   for (i=0; i<n; i++) {
343     o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
344   }
345   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
346   ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr);
347   /* column scale "off-diagonal" portion of local matrix */
348   ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 EXTERN_C_END
352 
353 
354