xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 58d68138c660dfb4e9f5b03334792cd4f2ffd7cc)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) {
8   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ *)mat->data;
9   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ *)(sbaij->B->data);
10   PetscInt        Nbs = sbaij->Nbs, i, j, *aj = B->j, ec = 0, *garray, *sgarray;
11   PetscInt        bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
12   IS              from, to;
13   Vec             gvec;
14   PetscMPIInt     rank   = sbaij->rank, lsize;
15   PetscInt       *owners = sbaij->rangebs, *ec_owner, k;
16   const PetscInt *sowners;
17   PetscScalar    *ptr;
18 #if defined(PETSC_USE_CTABLE)
19   PetscTable         gid1_lid1; /* one-based gid to lid table */
20   PetscTablePosition tpos;
21   PetscInt           gid, lid;
22 #else
23   PetscInt *indices;
24 #endif
25 
26   PetscFunctionBegin;
27 #if defined(PETSC_USE_CTABLE)
28   PetscCall(PetscTableCreate(mbs, Nbs + 1, &gid1_lid1));
29   for (i = 0; i < B->mbs; i++) {
30     for (j = 0; j < B->ilen[i]; j++) {
31       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
32       PetscCall(PetscTableFind(gid1_lid1, gid1, &data));
33       if (!data) PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES));
34     }
35   }
36   /* form array of columns we need */
37   PetscCall(PetscMalloc1(ec, &garray));
38   PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos));
39   while (tpos) {
40     PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid));
41     gid--;
42     lid--;
43     garray[lid] = gid;
44   }
45   PetscCall(PetscSortInt(ec, garray));
46   PetscCall(PetscTableRemoveAll(gid1_lid1));
47   for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES));
48   /* compact out the extra columns in B */
49   for (i = 0; i < B->mbs; i++) {
50     for (j = 0; j < B->ilen[i]; j++) {
51       PetscInt gid1 = aj[B->i[i] + j] + 1;
52       PetscCall(PetscTableFind(gid1_lid1, gid1, &lid));
53       lid--;
54       aj[B->i[i] + j] = lid;
55     }
56   }
57   PetscCall(PetscTableDestroy(&gid1_lid1));
58   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
59   for (i = j = 0; i < ec; i++) {
60     while (garray[i] >= owners[j + 1]) j++;
61     ec_owner[i] = j;
62   }
63 #else
64   /* For the first stab we make an array as long as the number of columns */
65   /* mark those columns that are in sbaij->B */
66   PetscCall(PetscCalloc1(Nbs, &indices));
67   for (i = 0; i < mbs; i++) {
68     for (j = 0; j < B->ilen[i]; j++) {
69       if (!indices[aj[B->i[i] + j]]) ec++;
70       indices[aj[B->i[i] + j]] = 1;
71     }
72   }
73 
74   /* form arrays of columns we need */
75   PetscCall(PetscMalloc1(ec, &garray));
76   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
77 
78   ec = 0;
79   for (j = 0; j < sbaij->size; j++) {
80     for (i = owners[j]; i < owners[j + 1]; i++) {
81       if (indices[i]) {
82         garray[ec]   = i;
83         ec_owner[ec] = j;
84         ec++;
85       }
86     }
87   }
88 
89   /* make indices now point into garray */
90   for (i = 0; i < ec; i++) indices[garray[i]] = i;
91 
92   /* compact out the extra columns in B */
93   for (i = 0; i < mbs; i++) {
94     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
95   }
96   PetscCall(PetscFree(indices));
97 #endif
98   B->nbs = ec;
99   PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
100   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
101 
102   PetscCall(VecScatterDestroy(&sbaij->sMvctx));
103   /* create local vector that is used to scatter into */
104   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
105 
106   /* create two temporary index sets for building scatter-gather */
107   PetscCall(PetscMalloc1(2 * ec, &stmp));
108   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
109   for (i = 0; i < ec; i++) stmp[i] = i;
110   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
111 
112   /* generate the scatter context
113      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
114   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
115   PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
116   PetscCall(VecDestroy(&gvec));
117 
118   sbaij->garray = garray;
119 
120   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->Mvctx));
121   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->lvec));
122   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from));
123   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to));
124 
125   PetscCall(ISDestroy(&from));
126   PetscCall(ISDestroy(&to));
127 
128   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
129   lsize = (mbs + ec) * bs;
130   PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0));
131   PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
132   PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
133 
134   PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
135 
136   /* x index in the IS sfrom */
137   for (i = 0; i < ec; i++) {
138     j          = ec_owner[i];
139     sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
140   }
141   /* b index in the IS sfrom */
142   k = sowners[rank] / bs + mbs;
143   for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
144   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
145 
146   /* x index in the IS sto */
147   k = sowners[rank] / bs + mbs;
148   for (i = 0; i < ec; i++) stmp[i] = (k + i);
149   /* b index in the IS sto */
150   for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
151 
152   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
153 
154   PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
155   PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
156 
157   PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
158   PetscCall(VecGetArray(sbaij->slvec1, &ptr));
159   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
160   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b));
161   PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
162 
163   PetscCall(VecGetArray(sbaij->slvec0, &ptr));
164   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b));
165   PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
166 
167   PetscCall(PetscFree(stmp));
168 
169   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->sMvctx));
170   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec0));
171   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec1));
172   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec0b));
173   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec1a));
174   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec1b));
175   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from));
176   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to));
177 
178   PetscCall(PetscLogObjectMemory((PetscObject)mat, ec * sizeof(PetscInt)));
179   PetscCall(ISDestroy(&from));
180   PetscCall(ISDestroy(&to));
181   PetscCall(PetscFree2(sgarray, ec_owner));
182   PetscFunctionReturn(0);
183 }
184 
185 /*
186      Takes the local part of an already assembled MPISBAIJ matrix
187    and disassembles it. This is to allow new nonzeros into the matrix
188    that require more communication in the matrix vector multiply.
189    Thus certain data-structures must be rebuilt.
190 
191    Kind of slow! But that's what application programmers get when
192    they are sloppy.
193 */
194 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) {
195   Mat_MPISBAIJ *baij  = (Mat_MPISBAIJ *)A->data;
196   Mat           B     = baij->B, Bnew;
197   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ *)B->data;
198   PetscInt      i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
199   PetscInt      k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, ec, m = A->rmap->n;
200   MatScalar    *a = Bbaij->a;
201   PetscScalar  *atmp;
202 #if defined(PETSC_USE_REAL_MAT_SINGLE)
203   PetscInt l;
204 #endif
205 
206   PetscFunctionBegin;
207 #if defined(PETSC_USE_REAL_MAT_SINGLE)
208   PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
209 #endif
210   /* free stuff related to matrix-vec multiply */
211   PetscCall(VecGetSize(baij->lvec, &ec)); /* needed for PetscLogObjectMemory below */
212   PetscCall(VecDestroy(&baij->lvec));
213   PetscCall(VecScatterDestroy(&baij->Mvctx));
214 
215   PetscCall(VecDestroy(&baij->slvec0));
216   PetscCall(VecDestroy(&baij->slvec0b));
217   PetscCall(VecDestroy(&baij->slvec1));
218   PetscCall(VecDestroy(&baij->slvec1a));
219   PetscCall(VecDestroy(&baij->slvec1b));
220 
221   if (baij->colmap) {
222 #if defined(PETSC_USE_CTABLE)
223     PetscCall(PetscTableDestroy(&baij->colmap));
224 #else
225     PetscCall(PetscFree(baij->colmap));
226     PetscCall(PetscLogObjectMemory((PetscObject)A, -Bbaij->nbs * sizeof(PetscInt)));
227 #endif
228   }
229 
230   /* make sure that B is assembled so we can access its values */
231   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
232   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
233 
234   /* invent new B and copy stuff over */
235   PetscCall(PetscMalloc1(mbs, &nz));
236   for (i = 0; i < mbs; i++) { nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; }
237   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
238   PetscCall(MatSetSizes(Bnew, m, n, m, n));
239   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
240   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
241   PetscCall(PetscFree(nz));
242 
243   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
244     ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
245   }
246 
247   /*
248    Ensure that B's nonzerostate is monotonically increasing.
249    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
250    */
251   Bnew->nonzerostate = B->nonzerostate;
252   PetscCall(PetscMalloc1(bs, &rvals));
253   for (i = 0; i < mbs; i++) {
254     rvals[0] = bs * i;
255     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
256     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
257       col = garray[Bbaij->j[j]] * bs;
258       for (k = 0; k < bs; k++) {
259 #if defined(PETSC_USE_REAL_MAT_SINGLE)
260         for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
261 #else
262         atmp = a + j * bs2 + k * bs;
263 #endif
264         PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
265         col++;
266       }
267     }
268   }
269 #if defined(PETSC_USE_REAL_MAT_SINGLE)
270   PetscCall(PetscFree(atmp));
271 #endif
272   PetscCall(PetscFree(baij->garray));
273 
274   baij->garray = NULL;
275 
276   PetscCall(PetscFree(rvals));
277   PetscCall(PetscLogObjectMemory((PetscObject)A, -ec * sizeof(PetscInt)));
278   PetscCall(MatDestroy(&B));
279   PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)Bnew));
280 
281   baij->B = Bnew;
282 
283   A->was_assembled = PETSC_FALSE;
284   PetscFunctionReturn(0);
285 }
286