xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision e91c6855ec8eedcfe894fb1dc8d9ee5acd2335f4)
1 #define PETSCMAT_DLL
2 
3 /*
4    Support for the parallel BAIJ matrix vector multiply
5 */
6 #include "../src/mat/impls/baij/mpi/mpibaij.h"
7 
8 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
13 {
14   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
16   PetscErrorCode ierr;
17   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18   PetscInt       bs = mat->rmap->bs,*stmp;
19   IS             from,to;
20   Vec            gvec;
21 #if defined (PETSC_USE_CTABLE)
22   PetscTable     gid1_lid1;
23   PetscTablePosition tpos;
24   PetscInt       gid,lid;
25 #else
26   PetscInt       Nbs = baij->Nbs,*indices;
27 #endif
28 
29   PetscFunctionBegin;
30 
31 #if defined (PETSC_USE_CTABLE)
32   /* use a table - Mark Adams */
33   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
34   for (i=0; i<B->mbs; i++) {
35     for (j=0; j<B->ilen[i]; j++) {
36       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
37       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
38       if (!data) {
39         /* one based table */
40         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
41       }
42     }
43   }
44   /* form array of columns we need */
45   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
46   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
47   while (tpos) {
48     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
49     gid--; lid--;
50     garray[lid] = gid;
51   }
52   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
53   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
54   for (i=0; i<ec; i++) {
55     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
56   }
57   /* compact out the extra columns in B */
58   for (i=0; i<B->mbs; i++) {
59     for (j=0; j<B->ilen[i]; j++) {
60       PetscInt gid1 = aj[B->i[i] + j] + 1;
61       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62       lid --;
63       aj[B->i[i]+j] = lid;
64     }
65   }
66   B->nbs     = ec;
67   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
68   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
69   ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr);
70   /* Mark Adams */
71 #else
72   /* Make an array as long as the number of columns */
73   /* mark those columns that are in baij->B */
74   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
75   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
76   for (i=0; i<B->mbs; i++) {
77     for (j=0; j<B->ilen[i]; j++) {
78       if (!indices[aj[B->i[i] + j]]) ec++;
79       indices[aj[B->i[i] + j]] = 1;
80     }
81   }
82 
83   /* form array of columns we need */
84   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
85   ec = 0;
86   for (i=0; i<Nbs; i++) {
87     if (indices[i]) {
88       garray[ec++] = i;
89     }
90   }
91 
92   /* make indices now point into garray */
93   for (i=0; i<ec; i++) {
94     indices[garray[i]] = i;
95   }
96 
97   /* compact out the extra columns in B */
98   for (i=0; i<B->mbs; i++) {
99     for (j=0; j<B->ilen[i]; j++) {
100       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
101     }
102   }
103   B->nbs       = ec;
104   baij->B->cmap->n =baij->B->cmap->N  = ec*mat->rmap->bs;
105   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
106   ierr = PetscFree(indices);CHKERRQ(ierr);
107 #endif
108 
109   /* create local vector that is used to scatter into */
110   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
111 
112   /* create two temporary index sets for building scatter-gather */
113   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
114 
115   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
116   for (i=0; i<ec; i++) { stmp[i] = i; }
117   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
118 
119   /* create temporary global vector to generate scatter context */
120   ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr);
121 
122   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
123 
124   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
125   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
126   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
127   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
128   baij->garray = garray;
129   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
130   ierr = ISDestroy(from);CHKERRQ(ierr);
131   ierr = ISDestroy(to);CHKERRQ(ierr);
132   ierr = VecDestroy(gvec);CHKERRQ(ierr);
133   PetscFunctionReturn(0);
134 }
135 
136 /*
137      Takes the local part of an already assembled MPIBAIJ matrix
138    and disassembles it. This is to allow new nonzeros into the matrix
139    that require more communication in the matrix vector multiply.
140    Thus certain data-structures must be rebuilt.
141 
142    Kind of slow! But that's what application programmers get when
143    they are sloppy.
144 */
145 #undef __FUNCT__
146 #define __FUNCT__ "DisAssemble_MPIBAIJ"
147 PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
148 {
149   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
150   Mat            B = baij->B,Bnew;
151   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
152   PetscErrorCode ierr;
153   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
154   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
155   MatScalar      *a = Bbaij->a;
156   MatScalar      *atmp;
157 
158 
159   PetscFunctionBegin;
160   /* free stuff related to matrix-vec multiply */
161   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
162   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
163   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
164   if (baij->colmap) {
165 #if defined (PETSC_USE_CTABLE)
166     ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
167 #else
168     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
169     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
170 #endif
171   }
172 
173   /* make sure that B is assembled so we can access its values */
174   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
175   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
176 
177   /* invent new B and copy stuff over */
178   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
179   for (i=0; i<mbs; i++) {
180     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
181   }
182   ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr);
183   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
184   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
185   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
186   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
187 
188   for (i=0; i<mbs; i++) {
189     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
190       col  = garray[Bbaij->j[j]];
191       atmp = a + j*bs2;
192       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
193     }
194   }
195   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
196 
197   ierr = PetscFree(nz);CHKERRQ(ierr);
198   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = MatDestroy(B);CHKERRQ(ierr);
201   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
202   baij->B = Bnew;
203   A->was_assembled = PETSC_FALSE;
204   A->assembled     = PETSC_FALSE;
205   PetscFunctionReturn(0);
206 }
207 
208 /*      ugly stuff added for Glenn someday we should fix this up */
209 
210 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
211                                       parts of the local matrix */
212 static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
213 
214 
215 #undef __FUNCT__
216 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
217 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
218 {
219   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
220   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
221   PetscErrorCode ierr;
222   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
223   PetscInt       *r_rmapd,*r_rmapo;
224 
225   PetscFunctionBegin;
226   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
227   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
228   ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
229   ierr = PetscMemzero(r_rmapd,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
230   nt   = 0;
231   for (i=0; i<inA->rbmapping->n; i++) {
232     if (inA->rbmapping->indices[i]*bs >= cstart && inA->rbmapping->indices[i]*bs < cend) {
233       nt++;
234       r_rmapd[i] = inA->rbmapping->indices[i] + 1;
235     }
236   }
237   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
238   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
239   for (i=0; i<inA->rbmapping->n; i++) {
240     if (r_rmapd[i]){
241       for (j=0; j<bs; j++) {
242         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
243       }
244     }
245   }
246   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
247   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
248 
249   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
250   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
251   for (i=0; i<B->nbs; i++) {
252     lindices[garray[i]] = i+1;
253   }
254   no   = inA->rbmapping->n - nt;
255   ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
256   ierr = PetscMemzero(r_rmapo,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
257   nt   = 0;
258   for (i=0; i<inA->rbmapping->n; i++) {
259     if (lindices[inA->rbmapping->indices[i]]) {
260       nt++;
261       r_rmapo[i] = lindices[inA->rbmapping->indices[i]];
262     }
263   }
264   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
265   ierr = PetscFree(lindices);CHKERRQ(ierr);
266   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
267   for (i=0; i<inA->rbmapping->n; i++) {
268     if (r_rmapo[i]){
269       for (j=0; j<bs; j++) {
270         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
271       }
272     }
273   }
274   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
275   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
276 
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
282 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
283 {
284   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
285   PetscErrorCode ierr;
286 
287   PetscFunctionBegin;
288   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 EXTERN_C_BEGIN
293 #undef __FUNCT__
294 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
295 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
296 {
297   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
298   PetscErrorCode ierr;
299   PetscInt       n,i;
300   PetscScalar    *d,*o,*s;
301 
302   PetscFunctionBegin;
303   if (!uglyrmapd) {
304     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
305   }
306 
307   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
308 
309   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
310   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
311   for (i=0; i<n; i++) {
312     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
313   }
314   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
315   /* column scale "diagonal" portion of local matrix */
316   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
317 
318   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
319   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
320   for (i=0; i<n; i++) {
321     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
322   }
323   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
324   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
325   /* column scale "off-diagonal" portion of local matrix */
326   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
327 
328   PetscFunctionReturn(0);
329 }
330 EXTERN_C_END
331 
332 
333