xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision a873a097637035d8db80cd75424b3cabe57921c9)
1 
2 /*
3    Support for the parallel AIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/aij/mpi/mpiaij.h>
6 #include <petsc-private/isimpl.h>    /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ"
10 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
11 {
12   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
13   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)(aij->B->data);
14   PetscErrorCode ierr;
15   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
16   IS             from,to;
17   Vec            gvec;
18   PetscBool      useblockis;
19 #if defined(PETSC_USE_CTABLE)
20   PetscTable         gid1_lid1;
21   PetscTablePosition tpos;
22   PetscInt           gid,lid;
23 #else
24   PetscInt N = mat->cmap->N,*indices;
25 #endif
26 
27   PetscFunctionBegin;
28 #if defined(PETSC_USE_CTABLE)
29   /* use a table */
30   ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr);
31   for (i=0; i<aij->B->rmap->n; i++) {
32     for (j=0; j<B->ilen[i]; j++) {
33       PetscInt data,gid1 = aj[B->i[i] + j] + 1;
34       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
35       if (!data) {
36         /* one based table */
37         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
38       }
39     }
40   }
41   /* form array of columns we need */
42   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
43   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
44   while (tpos) {
45     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
46     gid--;
47     lid--;
48     garray[lid] = gid;
49   }
50   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */
51   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52   for (i=0; i<ec; i++) {
53     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
54   }
55   /* compact out the extra columns in B */
56   for (i=0; i<aij->B->rmap->n; i++) {
57     for (j=0; j<B->ilen[i]; j++) {
58       PetscInt gid1 = aj[B->i[i] + j] + 1;
59       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60       lid--;
61       aj[B->i[i] + j] = lid;
62     }
63   }
64   aij->B->cmap->n = aij->B->cmap->N = ec;
65 
66   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
67   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
68 #else
69   /* Make an array as long as the number of columns */
70   /* mark those columns that are in aij->B */
71   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
72   ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr);
73   for (i=0; i<aij->B->rmap->n; i++) {
74     for (j=0; j<B->ilen[i]; j++) {
75       if (!indices[aj[B->i[i] + j]]) ec++;
76       indices[aj[B->i[i] + j]] = 1;
77     }
78   }
79 
80   /* form array of columns we need */
81   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
82   ec   = 0;
83   for (i=0; i<N; i++) {
84     if (indices[i]) garray[ec++] = i;
85   }
86 
87   /* make indices now point into garray */
88   for (i=0; i<ec; i++) {
89     indices[garray[i]] = i;
90   }
91 
92   /* compact out the extra columns in B */
93   for (i=0; i<aij->B->rmap->n; i++) {
94     for (j=0; j<B->ilen[i]; j++) {
95       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
96     }
97   }
98   aij->B->cmap->n = aij->B->cmap->N = ec;
99 
100   ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr);
101   ierr = PetscFree(indices);CHKERRQ(ierr);
102 #endif
103   /* create local vector that is used to scatter into */
104   ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr);
105 
106   /* create two temporary Index sets for build scatter gather */
107   /*  check for the special case where blocks are communicated for faster VecScatterXXX */
108   useblockis = PETSC_FALSE;
109 #if 0
110   if (mat->cmap->bs > 1) {
111     PetscInt bs = mat->cmap->bs,ibs,ga;
112     if (!(ec % bs)) {
113       useblockis = PETSC_TRUE;
114       for (i=0; i<ec/bs; i++) {
115         if ((ga = garray[ibs = i*bs]) % bs) {
116           useblockis = PETSC_FALSE;
117           break;
118         }
119         for (j=1; j<bs; j++) {
120           if (garray[ibs+j] != ga+j) {
121             useblockis = PETSC_FALSE;
122             break;
123           }
124         }
125         if (!useblockis) break;
126       }
127     }
128   }
129 #if defined(PETSC_USE_DEBUG)
130   i    = (PetscInt)useblockis;
131   ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
132   if (j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)");
133 #endif
134 #endif
135   if (useblockis) {
136     PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs;
137     if (ec%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs);
138     ierr = PetscInfo(mat,"Using block index set to define scatter\n");CHKERRQ(ierr);
139     ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr);
140     for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs;
141     ierr = ISCreateBlock(PetscObjectComm((PetscObject)mat),bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr);
142   } else {
143     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
144   }
145 
146   ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr);
147 
148   /* create temporary global vector to generate scatter context */
149   /* This does not allocate the array's memory so is efficient */
150   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
151 
152   /* generate the scatter context */
153   ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr);
154   ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr);
155   ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr);
156   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
157   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
158 
159   aij->garray = garray;
160 
161   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
162   ierr = ISDestroy(&from);CHKERRQ(ierr);
163   ierr = ISDestroy(&to);CHKERRQ(ierr);
164   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
165   PetscFunctionReturn(0);
166 }
167 
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatDisAssemble_MPIAIJ"
171 /*
172      Takes the local part of an already assembled MPIAIJ matrix
173    and disassembles it. This is to allow new nonzeros into the matrix
174    that require more communication in the matrix vector multiply.
175    Thus certain data-structures must be rebuilt.
176 
177    Kind of slow! But that's what application programmers get when
178    they are sloppy.
179 */
180 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A)
181 {
182   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)A->data;
183   Mat            B     = aij->B,Bnew;
184   Mat_SeqAIJ     *Baij = (Mat_SeqAIJ*)B->data;
185   PetscErrorCode ierr;
186   PetscInt       i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec;
187   PetscScalar    v;
188 
189   PetscFunctionBegin;
190   /* free stuff related to matrix-vec multiply */
191   ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
192   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
193   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
194   if (aij->colmap) {
195 #if defined(PETSC_USE_CTABLE)
196     ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
197 #else
198     ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
199     ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr);
200 #endif
201   }
202 
203   /* make sure that B is assembled so we can access its values */
204   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
205   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
206 
207   /* invent new B and copy stuff over */
208   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr);
209   for (i=0; i<m; i++) {
210     nz[i] = Baij->i[i+1] - Baij->i[i];
211   }
212   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
213   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
214   ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
215   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
216   ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr);
217 
218   ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */
219 
220   ierr = PetscFree(nz);CHKERRQ(ierr);
221   for (i=0; i<m; i++) {
222     for (j=Baij->i[i]; j<Baij->i[i+1]; j++) {
223       col  = garray[Baij->j[ct]];
224       v    = Baij->a[ct++];
225       ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr);
226     }
227   }
228   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
229   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
230   ierr = MatDestroy(&B);CHKERRQ(ierr);
231   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
232 
233   aij->B           = Bnew;
234   A->was_assembled = PETSC_FALSE;
235   PetscFunctionReturn(0);
236 }
237 
238 /*      ugly stuff added for Glenn someday we should fix this up */
239 
240 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
241 static Vec auglydd          = 0,auglyoo     = 0; /* work vectors used to scale the two parts of the local matrix */
242 
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp"
246 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
247 {
248   Mat_MPIAIJ     *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */
249   PetscErrorCode ierr;
250   PetscInt       i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices;
251   PetscInt       *r_rmapd,*r_rmapo;
252 
253   PetscFunctionBegin;
254   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
255   ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr);
256   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
257   ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
258   nt   = 0;
259   for (i=0; i<inA->rmap->mapping->n; i++) {
260     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
261       nt++;
262       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
263     }
264   }
265   if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n);
266   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr);
267   for (i=0; i<inA->rmap->mapping->n; i++) {
268     if (r_rmapd[i]) {
269       auglyrmapd[(r_rmapd[i]-1)-cstart] = i;
270     }
271   }
272   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
273   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr);
274 
275   ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
276   ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
277   for (i=0; i<ina->B->cmap->n; i++) {
278     lindices[garray[i]] = i+1;
279   }
280   no   = inA->rmap->mapping->n - nt;
281   ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
282   ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr);
283   nt   = 0;
284   for (i=0; i<inA->rmap->mapping->n; i++) {
285     if (lindices[inA->rmap->mapping->indices[i]]) {
286       nt++;
287       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
288     }
289   }
290   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
291   ierr = PetscFree(lindices);CHKERRQ(ierr);
292   ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr);
293   for (i=0; i<inA->rmap->mapping->n; i++) {
294     if (r_rmapo[i]) {
295       auglyrmapo[(r_rmapo[i]-1)] = i;
296     }
297   }
298   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
299   ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal"
305 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale)
306 {
307   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
308   PetscErrorCode ierr;
309 
310   PetscFunctionBegin;
311   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 
315 EXTERN_C_BEGIN
316 #undef __FUNCT__
317 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ"
318 PetscErrorCode  MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale)
319 {
320   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */
321   PetscErrorCode ierr;
322   PetscInt       n,i;
323   PetscScalar    *d,*o,*s;
324 
325   PetscFunctionBegin;
326   if (!auglyrmapd) {
327     ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
328   }
329 
330   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
331 
332   ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr);
333   ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr);
334   for (i=0; i<n; i++) {
335     d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
336   }
337   ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr);
338   /* column scale "diagonal" portion of local matrix */
339   ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr);
340 
341   ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr);
342   ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr);
343   for (i=0; i<n; i++) {
344     o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
345   }
346   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
347   ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr);
348   /* column scale "off-diagonal" portion of local matrix */
349   ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 EXTERN_C_END
353 
354 
355