xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision d875aeafb16f00a1010d1b59f3666f78a04e796e)
1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 #include <petsc/private/isimpl.h>    /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
9 {
10   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
11   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
12   PetscErrorCode ierr;
13   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
14   PetscInt       bs = mat->rmap->bs,*stmp;
15   IS             from,to;
16   Vec            gvec;
17 #if defined(PETSC_USE_CTABLE)
18   PetscTable         gid1_lid1;
19   PetscTablePosition tpos;
20   PetscInt           gid,lid;
21 #else
22   PetscInt Nbs = baij->Nbs,*indices;
23 #endif
24 
25   PetscFunctionBegin;
26 #if defined(PETSC_USE_CTABLE)
27   /* use a table - Mark Adams */
28   ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr);
29   for (i=0; i<B->mbs; i++) {
30     for (j=0; j<B->ilen[i]; j++) {
31       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
32       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
33       if (!data) {
34         /* one based table */
35         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
36       }
37     }
38   }
39   /* form array of columns we need */
40   ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr);
41   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
42   while (tpos) {
43     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
44     gid--; lid--;
45     garray[lid] = gid;
46   }
47   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
48   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
49   for (i=0; i<ec; i++) {
50     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
51   }
52   /* compact out the extra columns in B */
53   for (i=0; i<B->mbs; i++) {
54     for (j=0; j<B->ilen[i]; j++) {
55       PetscInt gid1 = aj[B->i[i] + j] + 1;
56       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
57       lid--;
58       aj[B->i[i]+j] = lid;
59     }
60   }
61   B->nbs           = ec;
62   baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs;
63 
64   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
65   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
66 #else
67   /* Make an array as long as the number of columns */
68   /* mark those columns that are in baij->B */
69   ierr = PetscCalloc1(Nbs+1,&indices);CHKERRQ(ierr);
70   for (i=0; i<B->mbs; i++) {
71     for (j=0; j<B->ilen[i]; j++) {
72       if (!indices[aj[B->i[i] + j]]) ec++;
73       indices[aj[B->i[i] + j]] = 1;
74     }
75   }
76 
77   /* form array of columns we need */
78   ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr);
79   ec   = 0;
80   for (i=0; i<Nbs; i++) {
81     if (indices[i]) {
82       garray[ec++] = i;
83     }
84   }
85 
86   /* make indices now point into garray */
87   for (i=0; i<ec; i++) {
88     indices[garray[i]] = i;
89   }
90 
91   /* compact out the extra columns in B */
92   for (i=0; i<B->mbs; i++) {
93     for (j=0; j<B->ilen[i]; j++) {
94       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
95     }
96   }
97   B->nbs           = ec;
98   baij->B->cmap->n = baij->B->cmap->N  = ec*mat->rmap->bs;
99 
100   ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr);
101   ierr = PetscFree(indices);CHKERRQ(ierr);
102 #endif
103 
104   /* create local vector that is used to scatter into */
105   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
106 
107   /* create two temporary index sets for building scatter-gather */
108   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
109 
110   ierr = PetscMalloc1(ec+1,&stmp);CHKERRQ(ierr);
111   for (i=0; i<ec; i++) stmp[i] = i;
112   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
113 
114   /* create temporary global vector to generate scatter context */
115   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
116 
117   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
118 
119   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr);
120   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr);
121   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
122   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
123 
124   baij->garray = garray;
125 
126   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
127   ierr = ISDestroy(&from);CHKERRQ(ierr);
128   ierr = ISDestroy(&to);CHKERRQ(ierr);
129   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 /*
134      Takes the local part of an already assembled MPIBAIJ matrix
135    and disassembles it. This is to allow new nonzeros into the matrix
136    that require more communication in the matrix vector multiply.
137    Thus certain data-structures must be rebuilt.
138 
139    Kind of slow! But that's what application programmers get when
140    they are sloppy.
141 */
142 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
143 {
144   Mat_MPIBAIJ    *baij  = (Mat_MPIBAIJ*)A->data;
145   Mat            B      = baij->B,Bnew;
146   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
149   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
150   MatScalar      *a  = Bbaij->a;
151   MatScalar      *atmp;
152 
153 
154   PetscFunctionBegin;
155   /* free stuff related to matrix-vec multiply */
156   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
157   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
158   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
159   if (baij->colmap) {
160 #if defined(PETSC_USE_CTABLE)
161     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
162 #else
163     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
164     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
165 #endif
166   }
167 
168   /* make sure that B is assembled so we can access its values */
169   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171 
172   /* invent new B and copy stuff over */
173   ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
174   for (i=0; i<mbs; i++) {
175     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
176   }
177   ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr);
178   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
179   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
180   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
181   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
182     ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
183   }
184 
185   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
186   /*
187    Ensure that B's nonzerostate is monotonically increasing.
188    Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
189    */
190   Bnew->nonzerostate = B->nonzerostate;
191 
192   for (i=0; i<mbs; i++) {
193     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
194       col  = garray[Bbaij->j[j]];
195       atmp = a + j*bs2;
196       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
197     }
198   }
199   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
200 
201   ierr = PetscFree(nz);CHKERRQ(ierr);
202   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
203   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
204   ierr = MatDestroy(&B);CHKERRQ(ierr);
205   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
206 
207   baij->B          = Bnew;
208   A->was_assembled = PETSC_FALSE;
209   A->assembled     = PETSC_FALSE;
210   PetscFunctionReturn(0);
211 }
212 
213 /*      ugly stuff added for Glenn someday we should fix this up */
214 
215 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
216 static Vec      uglydd     = 0,uglyoo     = 0;  /* work vectors used to scale the two parts of the local matrix */
217 
218 
219 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
220 {
221   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
222   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)ina->B->data;
223   PetscErrorCode ierr;
224   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
225   PetscInt       *r_rmapd,*r_rmapo;
226 
227   PetscFunctionBegin;
228   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
229   ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr);
230   ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr);
231   nt   = 0;
232   for (i=0; i<inA->rmap->mapping->n; i++) {
233     if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) {
234       nt++;
235       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
236     }
237   }
238   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
239   ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr);
240   for (i=0; i<inA->rmap->mapping->n; i++) {
241     if (r_rmapd[i]) {
242       for (j=0; j<bs; j++) {
243         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
244       }
245     }
246   }
247   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
248   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
249 
250   ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr);
251   for (i=0; i<B->nbs; i++) {
252     lindices[garray[i]] = i+1;
253   }
254   no   = inA->rmap->mapping->n - nt;
255   ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr);
256   nt   = 0;
257   for (i=0; i<inA->rmap->mapping->n; i++) {
258     if (lindices[inA->rmap->mapping->indices[i]]) {
259       nt++;
260       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
261     }
262   }
263   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
264   ierr = PetscFree(lindices);CHKERRQ(ierr);
265   ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr);
266   for (i=0; i<inA->rmap->mapping->n; i++) {
267     if (r_rmapo[i]) {
268       for (j=0; j<bs; j++) {
269         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
270       }
271     }
272   }
273   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
274   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
279 {
280   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
281   PetscErrorCode ierr;
282 
283   PetscFunctionBegin;
284   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
285   PetscFunctionReturn(0);
286 }
287 
288 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
289 {
290   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
291   PetscErrorCode    ierr;
292   PetscInt          n,i;
293   PetscScalar       *d,*o;
294   const PetscScalar *s;
295 
296   PetscFunctionBegin;
297   if (!uglyrmapd) {
298     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
299   }
300 
301   ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr);
302 
303   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
304   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
305   for (i=0; i<n; i++) {
306     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
307   }
308   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
309   /* column scale "diagonal" portion of local matrix */
310   ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr);
311 
312   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
313   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
314   for (i=0; i<n; i++) {
315     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
316   }
317   ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr);
318   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
319   /* column scale "off-diagonal" portion of local matrix */
320   ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323