xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision b122ec5aa1bd4469eb4e0673542fb7de3f411254)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8 {
9   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
10   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
11   PetscInt       Nbs = sbaij->Nbs,i,j,*aj = B->j,ec = 0,*garray,*sgarray;
12   PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
13   IS             from,to;
14   Vec            gvec;
15   PetscMPIInt    rank = sbaij->rank,lsize;
16   PetscInt       *owners = sbaij->rangebs,*ec_owner,k;
17   const PetscInt *sowners;
18   PetscScalar    *ptr;
19 #if defined(PETSC_USE_CTABLE)
20   PetscTable         gid1_lid1; /* one-based gid to lid table */
21   PetscTablePosition tpos;
22   PetscInt           gid,lid;
23 #else
24   PetscInt           *indices;
25 #endif
26 
27   PetscFunctionBegin;
28 #if defined(PETSC_USE_CTABLE)
29   CHKERRQ(PetscTableCreate(mbs,Nbs+1,&gid1_lid1));
30   for (i=0; i<B->mbs; i++) {
31     for (j=0; j<B->ilen[i]; j++) {
32       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
33       CHKERRQ(PetscTableFind(gid1_lid1,gid1,&data));
34       if (!data) CHKERRQ(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES));
35     }
36   }
37   /* form array of columns we need */
38   CHKERRQ(PetscMalloc1(ec,&garray));
39   CHKERRQ(PetscTableGetHeadPosition(gid1_lid1,&tpos));
40   while (tpos) {
41     CHKERRQ(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid));
42     gid--; lid--;
43     garray[lid] = gid;
44   }
45   CHKERRQ(PetscSortInt(ec,garray));
46   CHKERRQ(PetscTableRemoveAll(gid1_lid1));
47   for (i=0; i<ec; i++) CHKERRQ(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES));
48   /* compact out the extra columns in B */
49   for (i=0; i<B->mbs; i++) {
50     for (j=0; j<B->ilen[i]; j++) {
51       PetscInt gid1 = aj[B->i[i] + j] + 1;
52       CHKERRQ(PetscTableFind(gid1_lid1,gid1,&lid));
53       lid--;
54       aj[B->i[i]+j] = lid;
55     }
56   }
57   CHKERRQ(PetscTableDestroy(&gid1_lid1));
58   CHKERRQ(PetscMalloc2(2*ec,&sgarray,ec,&ec_owner));
59   for (i=j=0; i<ec; i++) {
60     while (garray[i]>=owners[j+1]) j++;
61     ec_owner[i] = j;
62   }
63 #else
64   /* For the first stab we make an array as long as the number of columns */
65   /* mark those columns that are in sbaij->B */
66   CHKERRQ(PetscCalloc1(Nbs,&indices));
67   for (i=0; i<mbs; i++) {
68     for (j=0; j<B->ilen[i]; j++) {
69       if (!indices[aj[B->i[i] + j]]) ec++;
70       indices[aj[B->i[i] + j]] = 1;
71     }
72   }
73 
74   /* form arrays of columns we need */
75   CHKERRQ(PetscMalloc1(ec,&garray));
76   CHKERRQ(PetscMalloc2(2*ec,&sgarray,ec,&ec_owner));
77 
78   ec = 0;
79   for (j=0; j<sbaij->size; j++) {
80     for (i=owners[j]; i<owners[j+1]; i++) {
81       if (indices[i]) {
82         garray[ec]   = i;
83         ec_owner[ec] = j;
84         ec++;
85       }
86     }
87   }
88 
89   /* make indices now point into garray */
90   for (i=0; i<ec; i++) indices[garray[i]] = i;
91 
92   /* compact out the extra columns in B */
93   for (i=0; i<mbs; i++) {
94     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
95   }
96   CHKERRQ(PetscFree(indices));
97 #endif
98   B->nbs = ec;
99   CHKERRQ(PetscLayoutDestroy(&sbaij->B->cmap));
100   CHKERRQ(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&sbaij->B->cmap));
101 
102   CHKERRQ(VecScatterDestroy(&sbaij->sMvctx));
103   /* create local vector that is used to scatter into */
104   CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec));
105 
106   /* create two temporary index sets for building scatter-gather */
107   CHKERRQ(PetscMalloc1(2*ec,&stmp));
108   CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from));
109   for (i=0; i<ec; i++) stmp[i] = i;
110   CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to));
111 
112   /* generate the scatter context
113      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
114   CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec));
115   CHKERRQ(VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx));
116   CHKERRQ(VecDestroy(&gvec));
117 
118   sbaij->garray = garray;
119 
120   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx));
121   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec));
122   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)from));
123   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)to));
124 
125   CHKERRQ(ISDestroy(&from));
126   CHKERRQ(ISDestroy(&to));
127 
128   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
129   lsize = (mbs + ec)*bs;
130   CHKERRQ(VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0));
131   CHKERRQ(VecDuplicate(sbaij->slvec0,&sbaij->slvec1));
132   CHKERRQ(VecGetSize(sbaij->slvec0,&vec_size));
133 
134   CHKERRQ(VecGetOwnershipRanges(sbaij->slvec0,&sowners));
135 
136   /* x index in the IS sfrom */
137   for (i=0; i<ec; i++) {
138     j          = ec_owner[i];
139     sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
140   }
141   /* b index in the IS sfrom */
142   k = sowners[rank]/bs + mbs;
143   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
144   CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from));
145 
146   /* x index in the IS sto */
147   k = sowners[rank]/bs + mbs;
148   for (i=0; i<ec; i++) stmp[i] = (k + i);
149   /* b index in the IS sto */
150   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
151 
152   CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to));
153 
154   CHKERRQ(VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx));
155   CHKERRQ(VecScatterViewFromOptions(sbaij->sMvctx,(PetscObject)mat,"-matmult_vecscatter_view"));
156 
157   CHKERRQ(VecGetLocalSize(sbaij->slvec1,&nt));
158   CHKERRQ(VecGetArray(sbaij->slvec1,&ptr));
159   CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a));
160   CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b));
161   CHKERRQ(VecRestoreArray(sbaij->slvec1,&ptr));
162 
163   CHKERRQ(VecGetArray(sbaij->slvec0,&ptr));
164   CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b));
165   CHKERRQ(VecRestoreArray(sbaij->slvec0,&ptr));
166 
167   CHKERRQ(PetscFree(stmp));
168 
169   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx));
170   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0));
171   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1));
172   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b));
173   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a));
174   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b));
175   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)from));
176   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)to));
177 
178   CHKERRQ(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt)));
179   CHKERRQ(ISDestroy(&from));
180   CHKERRQ(ISDestroy(&to));
181   CHKERRQ(PetscFree2(sgarray,ec_owner));
182   PetscFunctionReturn(0);
183 }
184 
185 /*
186      Takes the local part of an already assembled MPISBAIJ matrix
187    and disassembles it. This is to allow new nonzeros into the matrix
188    that require more communication in the matrix vector multiply.
189    Thus certain data-structures must be rebuilt.
190 
191    Kind of slow! But that's what application programmers get when
192    they are sloppy.
193 */
194 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
195 {
196   Mat_MPISBAIJ   *baij  = (Mat_MPISBAIJ*)A->data;
197   Mat            B      = baij->B,Bnew;
198   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
199   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
200   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
201   MatScalar      *a = Bbaij->a;
202   PetscScalar    *atmp;
203 #if defined(PETSC_USE_REAL_MAT_SINGLE)
204   PetscInt l;
205 #endif
206 
207   PetscFunctionBegin;
208 #if defined(PETSC_USE_REAL_MAT_SINGLE)
209   CHKERRQ(PetscMalloc1(A->rmap->bs,&atmp));
210 #endif
211   /* free stuff related to matrix-vec multiply */
212   CHKERRQ(VecGetSize(baij->lvec,&ec)); /* needed for PetscLogObjectMemory below */
213   CHKERRQ(VecDestroy(&baij->lvec));
214   CHKERRQ(VecScatterDestroy(&baij->Mvctx));
215 
216   CHKERRQ(VecDestroy(&baij->slvec0));
217   CHKERRQ(VecDestroy(&baij->slvec0b));
218   CHKERRQ(VecDestroy(&baij->slvec1));
219   CHKERRQ(VecDestroy(&baij->slvec1a));
220   CHKERRQ(VecDestroy(&baij->slvec1b));
221 
222   if (baij->colmap) {
223 #if defined(PETSC_USE_CTABLE)
224     CHKERRQ(PetscTableDestroy(&baij->colmap));
225 #else
226     CHKERRQ(PetscFree(baij->colmap));
227     CHKERRQ(PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt)));
228 #endif
229   }
230 
231   /* make sure that B is assembled so we can access its values */
232   CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
233   CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
234 
235   /* invent new B and copy stuff over */
236   CHKERRQ(PetscMalloc1(mbs,&nz));
237   for (i=0; i<mbs; i++) {
238     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
239   }
240   CHKERRQ(MatCreate(PETSC_COMM_SELF,&Bnew));
241   CHKERRQ(MatSetSizes(Bnew,m,n,m,n));
242   CHKERRQ(MatSetType(Bnew,((PetscObject)B)->type_name));
243   CHKERRQ(MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz));
244   CHKERRQ(PetscFree(nz));
245 
246   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
247     ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
248   }
249 
250   /*
251    Ensure that B's nonzerostate is monotonically increasing.
252    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
253    */
254   Bnew->nonzerostate = B->nonzerostate;
255   CHKERRQ(PetscMalloc1(bs,&rvals));
256   for (i=0; i<mbs; i++) {
257     rvals[0] = bs*i;
258     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
259     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
260       col = garray[Bbaij->j[j]]*bs;
261       for (k=0; k<bs; k++) {
262 #if defined(PETSC_USE_REAL_MAT_SINGLE)
263         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
264 #else
265         atmp = a+j*bs2 + k*bs;
266 #endif
267         CHKERRQ(MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode));
268         col++;
269       }
270     }
271   }
272 #if defined(PETSC_USE_REAL_MAT_SINGLE)
273   CHKERRQ(PetscFree(atmp));
274 #endif
275   CHKERRQ(PetscFree(baij->garray));
276 
277   baij->garray = NULL;
278 
279   CHKERRQ(PetscFree(rvals));
280   CHKERRQ(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt)));
281   CHKERRQ(MatDestroy(&B));
282   CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew));
283 
284   baij->B = Bnew;
285 
286   A->was_assembled = PETSC_FALSE;
287   PetscFunctionReturn(0);
288 }
289