xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision 9bbb2c8819ed26c3c8bceed3cb2a23d8d8ad297f)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel AIJ matrix vector multiply
5 */
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ"
10 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
11 {
12   Mat_MPIAIJ         *aij = (Mat_MPIAIJ*)mat->data;
13   Mat_SeqAIJ         *B = (Mat_SeqAIJ*)(aij->B->data);
14   PetscErrorCode     ierr;
15   PetscInt           i,j,*aj = B->j,ec = 0,*garray;
16   IS                 from,to;
17   Vec                gvec;
18   PetscTruth         useblockis;
19 #if defined (PETSC_USE_CTABLE)
20   PetscTable         gid1_lid1;
21   PetscTablePosition tpos;
22   PetscInt           gid,lid;
23 #else
24   PetscInt           N = mat->N,*indices;
25 #endif
26 
27   PetscFunctionBegin;
28 
29 #if defined (PETSC_USE_CTABLE)
30   /* use a table - Mark Adams */
31   ierr = PetscTableCreate(aij->B->m,&gid1_lid1);CHKERRQ(ierr);
32   for (i=0; i<aij->B->m; i++) {
33     for (j=0; j<B->ilen[i]; j++) {
34       PetscInt data,gid1 = aj[B->i[i] + j] + 1;
35       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
36       if (!data) {
37         /* one based table */
38         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
39       }
40     }
41   }
42   /* form array of columns we need */
43   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
44   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
45   while (tpos) {
46     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
47     gid--;
48     lid--;
49     garray[lid] = gid;
50   }
51   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */
52   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53   for (i=0; i<ec; i++) {
54     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
55   }
56   /* compact out the extra columns in B */
57   for (i=0; i<aij->B->m; i++) {
58     for (j=0; j<B->ilen[i]; j++) {
59       PetscInt gid1 = aj[B->i[i] + j] + 1;
60       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61       lid --;
62       aj[B->i[i] + j]  = lid;
63     }
64   }
65   aij->B->n = aij->B->N = ec;
66   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
67   /* Mark Adams */
68 #else
69   /* For the first stab we make an array as long as the number of columns */
70   /* mark those columns that are in aij->B */
71   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
72   ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr);
73   for (i=0; i<aij->B->m; i++) {
74     for (j=0; j<B->ilen[i]; j++) {
75       if (!indices[aj[B->i[i] + j] ]) ec++;
76       indices[aj[B->i[i] + j] ] = 1;
77     }
78   }
79 
80   /* form array of columns we need */
81   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
82   ec = 0;
83   for (i=0; i<N; i++) {
84     if (indices[i]) garray[ec++] = i;
85   }
86 
87   /* make indices now point into garray */
88   for (i=0; i<ec; i++) {
89     indices[garray[i]] = i;
90   }
91 
92   /* compact out the extra columns in B */
93   for (i=0; i<aij->B->m; i++) {
94     for (j=0; j<B->ilen[i]; j++) {
95       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
96     }
97   }
98   aij->B->n = aij->B->N = ec;
99   ierr = PetscFree(indices);CHKERRQ(ierr);
100 #endif
101   /* create local vector that is used to scatter into */
102   ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr);
103 
104   /* create two temporary Index sets for build scatter gather */
105   /*  check for the special case where blocks are communicated for faster VecScatterXXX */
106   useblockis = PETSC_TRUE;
107   if (mat->bs > 1) {
108     PetscInt bs = mat->bs,ibs,ga;
109     if (!(ec % bs)) {
110       for (i=0; i<ec/bs; i++) {
111         if ((ga = garray[ibs = i*bs]) % bs) {
112           useblockis = PETSC_FALSE;
113           break;
114         }
115         for (j=1; j<bs; j++) {
116           if (garray[ibs+j] != ga+j) {
117             useblockis = PETSC_FALSE;
118             break;
119           }
120         }
121         if (!useblockis) break;
122       }
123     }
124   }
125   if (useblockis) {
126     PetscInt *ga,bs = mat->bs,iec = ec/bs;
127     ierr = PetscLogInfo((mat,"MatSetUpMultiply_MPIAIJ: Using block index set to define scatter\n"));
128     ierr = PetscMalloc((ec/mat->bs)*sizeof(PetscInt),&ga);CHKERRQ(ierr);
129     for (i=0; i<iec; i++) ga[i] = garray[i*bs];
130     ierr = ISCreateBlock(mat->comm,bs,iec,ga,&from);CHKERRQ(ierr);
131     ierr = PetscFree(ga);CHKERRQ(ierr);
132   } else {
133     ierr = ISCreateGeneral(mat->comm,ec,garray,&from);CHKERRQ(ierr);
134   }
135   ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr);
136 
137   /* create temporary global vector to generate scatter context */
138   /* this is inefficient, but otherwise we must do either
139      1) save garray until the first actual scatter when the vector is known or
140      2) have another way of generating a scatter context without a vector.*/
141   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
142 
143   /* generate the scatter context */
144   ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr);
145   ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr);
146   ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr);
147   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
148   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
149   aij->garray = garray;
150   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
151   ierr = ISDestroy(from);CHKERRQ(ierr);
152   ierr = ISDestroy(to);CHKERRQ(ierr);
153   ierr = VecDestroy(gvec);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 
158 #undef __FUNCT__
159 #define __FUNCT__ "DisAssemble_MPIAIJ"
160 /*
161      Takes the local part of an already assembled MPIAIJ matrix
162    and disassembles it. This is to allow new nonzeros into the matrix
163    that require more communication in the matrix vector multiply.
164    Thus certain data-structures must be rebuilt.
165 
166    Kind of slow! But that's what application programmers get when
167    they are sloppy.
168 */
169 PetscErrorCode DisAssemble_MPIAIJ(Mat A)
170 {
171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
172   Mat            B = aij->B,Bnew;
173   Mat_SeqAIJ     *Baij = (Mat_SeqAIJ*)B->data;
174   PetscErrorCode ierr;
175   PetscInt       i,j,m = B->m,n = A->N,col,ct = 0,*garray = aij->garray,*nz,ec;
176   PetscScalar    v;
177 
178   PetscFunctionBegin;
179   /* free stuff related to matrix-vec multiply */
180   ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
181   ierr = VecDestroy(aij->lvec);CHKERRQ(ierr); aij->lvec = 0;
182   ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr); aij->Mvctx = 0;
183   if (aij->colmap) {
184 #if defined (PETSC_USE_CTABLE)
185     ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);
186     aij->colmap = 0;
187 #else
188     ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
189     aij->colmap = 0;
190     ierr = PetscLogObjectMemory(A,-aij->B->n*sizeof(PetscInt));CHKERRQ(ierr);
191 #endif
192   }
193 
194   /* make sure that B is assembled so we can access its values */
195   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
196   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
197 
198   /* invent new B and copy stuff over */
199   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr);
200   for (i=0; i<m; i++) {
201     nz[i] = Baij->i[i+1] - Baij->i[i];
202   }
203   ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr);
204   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
205   ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr);
206   ierr = PetscFree(nz);CHKERRQ(ierr);
207   for (i=0; i<m; i++) {
208     for (j=Baij->i[i]; j<Baij->i[i+1]; j++) {
209       col  = garray[Baij->j[ct]];
210       v    = Baij->a[ct++];
211       ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr);
212     }
213   }
214   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
215   aij->garray = 0;
216   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
217   ierr = MatDestroy(B);CHKERRQ(ierr);
218   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
219   aij->B = Bnew;
220   A->was_assembled = PETSC_FALSE;
221   PetscFunctionReturn(0);
222 }
223 
224 /*      ugly stuff added for Glenn someday we should fix this up */
225 
226 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
227                                       parts of the local matrix */
228 static Vec auglydd = 0,auglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
229 
230 
231 #undef __FUNCT__
232 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp"
233 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
234 {
235   Mat_MPIAIJ     *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */
236   PetscErrorCode ierr;
237   PetscInt       i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices;
238   PetscInt       *r_rmapd,*r_rmapo;
239 
240   PetscFunctionBegin;
241   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
242   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
243   ierr = PetscMalloc((inA->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
244   ierr = PetscMemzero(r_rmapd,inA->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
245   nt   = 0;
246   for (i=0; i<inA->mapping->n; i++) {
247     if (inA->mapping->indices[i] >= cstart && inA->mapping->indices[i] < cend) {
248       nt++;
249       r_rmapd[i] = inA->mapping->indices[i] + 1;
250     }
251   }
252   if (nt != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n);
253   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr);
254   for (i=0; i<inA->mapping->n; i++) {
255     if (r_rmapd[i]){
256       auglyrmapd[(r_rmapd[i]-1)-cstart] = i;
257     }
258   }
259   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
260   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr);
261 
262   ierr = PetscMalloc((inA->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
263   ierr = PetscMemzero(lindices,inA->N*sizeof(PetscInt));CHKERRQ(ierr);
264   for (i=0; i<ina->B->n; i++) {
265     lindices[garray[i]] = i+1;
266   }
267   no   = inA->mapping->n - nt;
268   ierr = PetscMalloc((inA->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
269   ierr = PetscMemzero(r_rmapo,inA->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
270   nt   = 0;
271   for (i=0; i<inA->mapping->n; i++) {
272     if (lindices[inA->mapping->indices[i]]) {
273       nt++;
274       r_rmapo[i] = lindices[inA->mapping->indices[i]];
275     }
276   }
277   if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
278   ierr = PetscFree(lindices);CHKERRQ(ierr);
279   ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr);
280   for (i=0; i<inA->mapping->n; i++) {
281     if (r_rmapo[i]){
282       auglyrmapo[(r_rmapo[i]-1)] = i;
283     }
284   }
285   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
286   ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr);
287 
288   PetscFunctionReturn(0);
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal"
293 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale)
294 {
295   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
296   PetscErrorCode ierr,(*f)(Mat,Vec);
297 
298   PetscFunctionBegin;
299   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
300   if (f) {
301     ierr = (*f)(A,scale);CHKERRQ(ierr);
302   }
303   PetscFunctionReturn(0);
304 }
305 
306 EXTERN_C_BEGIN
307 #undef __FUNCT__
308 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ"
309 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)
310 {
311   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */
312   PetscErrorCode ierr;
313   PetscInt       n,i;
314   PetscScalar    *d,*o,*s;
315 
316   PetscFunctionBegin;
317   if (!auglyrmapd) {
318     ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
319   }
320 
321   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
322 
323   ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr);
324   ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr);
325   for (i=0; i<n; i++) {
326     d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
327   }
328   ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr);
329   /* column scale "diagonal" portion of local matrix */
330   ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr);
331 
332   ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr);
333   ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr);
334   for (i=0; i<n; i++) {
335     o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
336   }
337   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
338   ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr);
339   /* column scale "off-diagonal" portion of local matrix */
340   ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr);
341 
342   PetscFunctionReturn(0);
343 }
344 EXTERN_C_END
345 
346 
347