xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision ffa8c5705e8ab2cf85ee1d14dbe507a6e2eb5283)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8 {
9   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
10   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
11   PetscInt       Nbs = sbaij->Nbs,i,j,*aj = B->j,ec = 0,*garray,*sgarray;
12   PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
13   IS             from,to;
14   Vec            gvec;
15   PetscMPIInt    rank = sbaij->rank,lsize;
16   PetscInt       *owners = sbaij->rangebs,*ec_owner,k;
17   const PetscInt *sowners;
18   PetscScalar    *ptr;
19 #if defined(PETSC_USE_CTABLE)
20   PetscTable         gid1_lid1; /* one-based gid to lid table */
21   PetscTablePosition tpos;
22   PetscInt           gid,lid;
23 #else
24   PetscInt           *indices;
25 #endif
26 
27   PetscFunctionBegin;
28 #if defined(PETSC_USE_CTABLE)
29   PetscCall(PetscTableCreate(mbs,Nbs+1,&gid1_lid1));
30   for (i=0; i<B->mbs; i++) {
31     for (j=0; j<B->ilen[i]; j++) {
32       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
33       PetscCall(PetscTableFind(gid1_lid1,gid1,&data));
34       if (!data) PetscCall(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES));
35     }
36   }
37   /* form array of columns we need */
38   PetscCall(PetscMalloc1(ec,&garray));
39   PetscCall(PetscTableGetHeadPosition(gid1_lid1,&tpos));
40   while (tpos) {
41     PetscCall(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid));
42     gid--; lid--;
43     garray[lid] = gid;
44   }
45   PetscCall(PetscSortInt(ec,garray));
46   PetscCall(PetscTableRemoveAll(gid1_lid1));
47   for (i=0; i<ec; i++) PetscCall(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES));
48   /* compact out the extra columns in B */
49   for (i=0; i<B->mbs; i++) {
50     for (j=0; j<B->ilen[i]; j++) {
51       PetscInt gid1 = aj[B->i[i] + j] + 1;
52       PetscCall(PetscTableFind(gid1_lid1,gid1,&lid));
53       lid--;
54       aj[B->i[i]+j] = lid;
55     }
56   }
57   PetscCall(PetscTableDestroy(&gid1_lid1));
58   PetscCall(PetscMalloc2(2*ec,&sgarray,ec,&ec_owner));
59   for (i=j=0; i<ec; i++) {
60     while (garray[i]>=owners[j+1]) j++;
61     ec_owner[i] = j;
62   }
63 #else
64   /* For the first stab we make an array as long as the number of columns */
65   /* mark those columns that are in sbaij->B */
66   PetscCall(PetscCalloc1(Nbs,&indices));
67   for (i=0; i<mbs; i++) {
68     for (j=0; j<B->ilen[i]; j++) {
69       if (!indices[aj[B->i[i] + j]]) ec++;
70       indices[aj[B->i[i] + j]] = 1;
71     }
72   }
73 
74   /* form arrays of columns we need */
75   PetscCall(PetscMalloc1(ec,&garray));
76   PetscCall(PetscMalloc2(2*ec,&sgarray,ec,&ec_owner));
77 
78   ec = 0;
79   for (j=0; j<sbaij->size; j++) {
80     for (i=owners[j]; i<owners[j+1]; i++) {
81       if (indices[i]) {
82         garray[ec]   = i;
83         ec_owner[ec] = j;
84         ec++;
85       }
86     }
87   }
88 
89   /* make indices now point into garray */
90   for (i=0; i<ec; i++) indices[garray[i]] = i;
91 
92   /* compact out the extra columns in B */
93   for (i=0; i<mbs; i++) {
94     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
95   }
96   PetscCall(PetscFree(indices));
97 #endif
98   B->nbs = ec;
99   PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
100   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&sbaij->B->cmap));
101 
102   PetscCall(VecScatterDestroy(&sbaij->sMvctx));
103   /* create local vector that is used to scatter into */
104   PetscCall(VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec));
105 
106   /* create two temporary index sets for building scatter-gather */
107   PetscCall(PetscMalloc1(2*ec,&stmp));
108   PetscCall(ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from));
109   for (i=0; i<ec; i++) stmp[i] = i;
110   PetscCall(ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to));
111 
112   /* generate the scatter context
113      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
114   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec));
115   PetscCall(VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx));
116   PetscCall(VecDestroy(&gvec));
117 
118   sbaij->garray = garray;
119 
120   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx));
121   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec));
122   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)from));
123   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)to));
124 
125   PetscCall(ISDestroy(&from));
126   PetscCall(ISDestroy(&to));
127 
128   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
129   lsize = (mbs + ec)*bs;
130   PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0));
131   PetscCall(VecDuplicate(sbaij->slvec0,&sbaij->slvec1));
132   PetscCall(VecGetSize(sbaij->slvec0,&vec_size));
133 
134   PetscCall(VecGetOwnershipRanges(sbaij->slvec0,&sowners));
135 
136   /* x index in the IS sfrom */
137   for (i=0; i<ec; i++) {
138     j          = ec_owner[i];
139     sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
140   }
141   /* b index in the IS sfrom */
142   k = sowners[rank]/bs + mbs;
143   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
144   PetscCall(ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from));
145 
146   /* x index in the IS sto */
147   k = sowners[rank]/bs + mbs;
148   for (i=0; i<ec; i++) stmp[i] = (k + i);
149   /* b index in the IS sto */
150   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
151 
152   PetscCall(ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to));
153 
154   PetscCall(VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx));
155   PetscCall(VecScatterViewFromOptions(sbaij->sMvctx,(PetscObject)mat,"-matmult_vecscatter_view"));
156 
157   PetscCall(VecGetLocalSize(sbaij->slvec1,&nt));
158   PetscCall(VecGetArray(sbaij->slvec1,&ptr));
159   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a));
160   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b));
161   PetscCall(VecRestoreArray(sbaij->slvec1,&ptr));
162 
163   PetscCall(VecGetArray(sbaij->slvec0,&ptr));
164   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b));
165   PetscCall(VecRestoreArray(sbaij->slvec0,&ptr));
166 
167   PetscCall(PetscFree(stmp));
168 
169   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx));
170   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0));
171   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1));
172   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b));
173   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a));
174   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b));
175   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)from));
176   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)to));
177 
178   PetscCall(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt)));
179   PetscCall(ISDestroy(&from));
180   PetscCall(ISDestroy(&to));
181   PetscCall(PetscFree2(sgarray,ec_owner));
182   PetscFunctionReturn(0);
183 }
184 
185 /*
186      Takes the local part of an already assembled MPISBAIJ matrix
187    and disassembles it. This is to allow new nonzeros into the matrix
188    that require more communication in the matrix vector multiply.
189    Thus certain data-structures must be rebuilt.
190 
191    Kind of slow! But that's what application programmers get when
192    they are sloppy.
193 */
194 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
195 {
196   Mat_MPISBAIJ   *baij  = (Mat_MPISBAIJ*)A->data;
197   Mat            B      = baij->B,Bnew;
198   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
199   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
200   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
201   MatScalar      *a = Bbaij->a;
202   PetscScalar    *atmp;
203 #if defined(PETSC_USE_REAL_MAT_SINGLE)
204   PetscInt l;
205 #endif
206 
207   PetscFunctionBegin;
208 #if defined(PETSC_USE_REAL_MAT_SINGLE)
209   PetscCall(PetscMalloc1(A->rmap->bs,&atmp));
210 #endif
211   /* free stuff related to matrix-vec multiply */
212   PetscCall(VecGetSize(baij->lvec,&ec)); /* needed for PetscLogObjectMemory below */
213   PetscCall(VecDestroy(&baij->lvec));
214   PetscCall(VecScatterDestroy(&baij->Mvctx));
215 
216   PetscCall(VecDestroy(&baij->slvec0));
217   PetscCall(VecDestroy(&baij->slvec0b));
218   PetscCall(VecDestroy(&baij->slvec1));
219   PetscCall(VecDestroy(&baij->slvec1a));
220   PetscCall(VecDestroy(&baij->slvec1b));
221 
222   if (baij->colmap) {
223 #if defined(PETSC_USE_CTABLE)
224     PetscCall(PetscTableDestroy(&baij->colmap));
225 #else
226     PetscCall(PetscFree(baij->colmap));
227     PetscCall(PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt)));
228 #endif
229   }
230 
231   /* make sure that B is assembled so we can access its values */
232   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
233   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
234 
235   /* invent new B and copy stuff over */
236   PetscCall(PetscMalloc1(mbs,&nz));
237   for (i=0; i<mbs; i++) {
238     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
239   }
240   PetscCall(MatCreate(PETSC_COMM_SELF,&Bnew));
241   PetscCall(MatSetSizes(Bnew,m,n,m,n));
242   PetscCall(MatSetType(Bnew,((PetscObject)B)->type_name));
243   PetscCall(MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz));
244   PetscCall(PetscFree(nz));
245 
246   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
247     ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
248   }
249 
250   /*
251    Ensure that B's nonzerostate is monotonically increasing.
252    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
253    */
254   Bnew->nonzerostate = B->nonzerostate;
255   PetscCall(PetscMalloc1(bs,&rvals));
256   for (i=0; i<mbs; i++) {
257     rvals[0] = bs*i;
258     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
259     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
260       col = garray[Bbaij->j[j]]*bs;
261       for (k=0; k<bs; k++) {
262 #if defined(PETSC_USE_REAL_MAT_SINGLE)
263         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
264 #else
265         atmp = a+j*bs2 + k*bs;
266 #endif
267         PetscCall(MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode));
268         col++;
269       }
270     }
271   }
272 #if defined(PETSC_USE_REAL_MAT_SINGLE)
273   PetscCall(PetscFree(atmp));
274 #endif
275   PetscCall(PetscFree(baij->garray));
276 
277   baij->garray = NULL;
278 
279   PetscCall(PetscFree(rvals));
280   PetscCall(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt)));
281   PetscCall(MatDestroy(&B));
282   PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew));
283 
284   baij->B = Bnew;
285 
286   A->was_assembled = PETSC_FALSE;
287   PetscFunctionReturn(0);
288 }
289