xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 43cdf1ebbcbfa05bee08e48007ef1bae3f20f4e9)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8 {
9   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ *)mat->data;
10   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ *)(sbaij->B->data);
11   PetscInt        i, j, *aj = B->j, ec = 0, *garray, *sgarray;
12   PetscInt        bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
13   IS              from, to;
14   Vec             gvec;
15   PetscMPIInt     rank   = sbaij->rank, lsize;
16   PetscInt       *owners = sbaij->rangebs, *ec_owner, k;
17   const PetscInt *sowners;
18   PetscScalar    *ptr;
19 #if defined(PETSC_USE_CTABLE)
20   PetscHMapI    gid1_lid1 = NULL; /* one-based gid to lid table */
21   PetscHashIter tpos;
22   PetscInt      gid, lid;
23 #else
24   PetscInt  Nbs = sbaij->Nbs;
25   PetscInt *indices;
26 #endif
27 
28   PetscFunctionBegin;
29 #if defined(PETSC_USE_CTABLE)
30   PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1));
31   for (i = 0; i < B->mbs; i++) {
32     for (j = 0; j < B->ilen[i]; j++) {
33       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
34       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
35       if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
36     }
37   }
38   /* form array of columns we need */
39   PetscCall(PetscMalloc1(ec, &garray));
40   PetscHashIterBegin(gid1_lid1, tpos);
41   while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
42     PetscHashIterGetKey(gid1_lid1, tpos, gid);
43     PetscHashIterGetVal(gid1_lid1, tpos, lid);
44     PetscHashIterNext(gid1_lid1, tpos);
45     gid--;
46     lid--;
47     garray[lid] = gid;
48   }
49   PetscCall(PetscSortInt(ec, garray));
50   PetscCall(PetscHMapIClear(gid1_lid1));
51   for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
52   /* compact out the extra columns in B */
53   for (i = 0; i < B->mbs; i++) {
54     for (j = 0; j < B->ilen[i]; j++) {
55       PetscInt gid1 = aj[B->i[i] + j] + 1;
56       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
57       lid--;
58       aj[B->i[i] + j] = lid;
59     }
60   }
61   PetscCall(PetscHMapIDestroy(&gid1_lid1));
62   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
63   for (i = j = 0; i < ec; i++) {
64     while (garray[i] >= owners[j + 1]) j++;
65     ec_owner[i] = j;
66   }
67 #else
68   /* For the first stab we make an array as long as the number of columns */
69   /* mark those columns that are in sbaij->B */
70   PetscCall(PetscCalloc1(Nbs, &indices));
71   for (i = 0; i < mbs; i++) {
72     for (j = 0; j < B->ilen[i]; j++) {
73       if (!indices[aj[B->i[i] + j]]) ec++;
74       indices[aj[B->i[i] + j]] = 1;
75     }
76   }
77 
78   /* form arrays of columns we need */
79   PetscCall(PetscMalloc1(ec, &garray));
80   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
81 
82   ec = 0;
83   for (j = 0; j < sbaij->size; j++) {
84     for (i = owners[j]; i < owners[j + 1]; i++) {
85       if (indices[i]) {
86         garray[ec]   = i;
87         ec_owner[ec] = j;
88         ec++;
89       }
90     }
91   }
92 
93   /* make indices now point into garray */
94   for (i = 0; i < ec; i++) indices[garray[i]] = i;
95 
96   /* compact out the extra columns in B */
97   for (i = 0; i < mbs; i++) {
98     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
99   }
100   PetscCall(PetscFree(indices));
101 #endif
102   B->nbs = ec;
103   PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
104   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
105 
106   PetscCall(VecScatterDestroy(&sbaij->sMvctx));
107   /* create local vector that is used to scatter into */
108   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
109 
110   /* create two temporary index sets for building scatter-gather */
111   PetscCall(PetscMalloc1(2 * ec, &stmp));
112   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
113   for (i = 0; i < ec; i++) stmp[i] = i;
114   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
115 
116   /* generate the scatter context
117      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
118   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
119   PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
120   PetscCall(VecDestroy(&gvec));
121 
122   sbaij->garray = garray;
123 
124   PetscCall(ISDestroy(&from));
125   PetscCall(ISDestroy(&to));
126 
127   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
128   lsize = (mbs + ec) * bs;
129   PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0));
130   PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
131   PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
132 
133   PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
134 
135   /* x index in the IS sfrom */
136   for (i = 0; i < ec; i++) {
137     j          = ec_owner[i];
138     sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
139   }
140   /* b index in the IS sfrom */
141   k = sowners[rank] / bs + mbs;
142   for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
143   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
144 
145   /* x index in the IS sto */
146   k = sowners[rank] / bs + mbs;
147   for (i = 0; i < ec; i++) stmp[i] = (k + i);
148   /* b index in the IS sto */
149   for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
150 
151   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
152 
153   PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
154   PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
155 
156   PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
157   PetscCall(VecGetArray(sbaij->slvec1, &ptr));
158   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
159   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b));
160   PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
161 
162   PetscCall(VecGetArray(sbaij->slvec0, &ptr));
163   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b));
164   PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
165 
166   PetscCall(PetscFree(stmp));
167 
168   PetscCall(ISDestroy(&from));
169   PetscCall(ISDestroy(&to));
170   PetscCall(PetscFree2(sgarray, ec_owner));
171   PetscFunctionReturn(0);
172 }
173 
174 /*
175      Takes the local part of an already assembled MPISBAIJ matrix
176    and disassembles it. This is to allow new nonzeros into the matrix
177    that require more communication in the matrix vector multiply.
178    Thus certain data-structures must be rebuilt.
179 
180    Kind of slow! But that's what application programmers get when
181    they are sloppy.
182 */
183 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
184 {
185   Mat_MPISBAIJ *baij  = (Mat_MPISBAIJ *)A->data;
186   Mat           B     = baij->B, Bnew;
187   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ *)B->data;
188   PetscInt      i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
189   PetscInt      k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
190   MatScalar    *a = Bbaij->a;
191   PetscScalar  *atmp;
192 #if defined(PETSC_USE_REAL_MAT_SINGLE)
193   PetscInt l;
194 #endif
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_REAL_MAT_SINGLE)
198   PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
199 #endif
200   /* free stuff related to matrix-vec multiply */
201   PetscCall(VecDestroy(&baij->lvec));
202   PetscCall(VecScatterDestroy(&baij->Mvctx));
203 
204   PetscCall(VecDestroy(&baij->slvec0));
205   PetscCall(VecDestroy(&baij->slvec0b));
206   PetscCall(VecDestroy(&baij->slvec1));
207   PetscCall(VecDestroy(&baij->slvec1a));
208   PetscCall(VecDestroy(&baij->slvec1b));
209 
210   if (baij->colmap) {
211 #if defined(PETSC_USE_CTABLE)
212     PetscCall(PetscHMapIDestroy(&baij->colmap));
213 #else
214     PetscCall(PetscFree(baij->colmap));
215 #endif
216   }
217 
218   /* make sure that B is assembled so we can access its values */
219   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
220   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
221 
222   /* invent new B and copy stuff over */
223   PetscCall(PetscMalloc1(mbs, &nz));
224   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
225   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
226   PetscCall(MatSetSizes(Bnew, m, n, m, n));
227   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
228   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
229   PetscCall(PetscFree(nz));
230 
231   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
232     ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
233   }
234 
235   /*
236    Ensure that B's nonzerostate is monotonically increasing.
237    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
238    */
239   Bnew->nonzerostate = B->nonzerostate;
240   PetscCall(PetscMalloc1(bs, &rvals));
241   for (i = 0; i < mbs; i++) {
242     rvals[0] = bs * i;
243     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
244     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
245       col = garray[Bbaij->j[j]] * bs;
246       for (k = 0; k < bs; k++) {
247 #if defined(PETSC_USE_REAL_MAT_SINGLE)
248         for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
249 #else
250         atmp = a + j * bs2 + k * bs;
251 #endif
252         PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
253         col++;
254       }
255     }
256   }
257 #if defined(PETSC_USE_REAL_MAT_SINGLE)
258   PetscCall(PetscFree(atmp));
259 #endif
260   PetscCall(PetscFree(baij->garray));
261 
262   baij->garray = NULL;
263 
264   PetscCall(PetscFree(rvals));
265   PetscCall(MatDestroy(&B));
266 
267   baij->B = Bnew;
268 
269   A->was_assembled = PETSC_FALSE;
270   PetscFunctionReturn(0);
271 }
272