xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision c748140286bc7db2f79d09b77ad2f995cbbeb47c)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8 {
9   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ *)mat->data;
10   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ *)(sbaij->B->data);
11   PetscInt        Nbs = sbaij->Nbs, i, j, *aj = B->j, ec = 0, *garray, *sgarray;
12   PetscInt        bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
13   IS              from, to;
14   Vec             gvec;
15   PetscMPIInt     rank   = sbaij->rank, lsize;
16   PetscInt       *owners = sbaij->rangebs, *ec_owner, k;
17   const PetscInt *sowners;
18   PetscScalar    *ptr;
19 #if defined(PETSC_USE_CTABLE)
20   PetscTable         gid1_lid1; /* one-based gid to lid table */
21   PetscTablePosition tpos;
22   PetscInt           gid, lid;
23 #else
24   PetscInt *indices;
25 #endif
26 
27   PetscFunctionBegin;
28 #if defined(PETSC_USE_CTABLE)
29   PetscCall(PetscTableCreate(mbs, Nbs + 1, &gid1_lid1));
30   for (i = 0; i < B->mbs; i++) {
31     for (j = 0; j < B->ilen[i]; j++) {
32       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33       PetscCall(PetscTableFind(gid1_lid1, gid1, &data));
34       if (!data) PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES));
35     }
36   }
37   /* form array of columns we need */
38   PetscCall(PetscMalloc1(ec, &garray));
39   PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos));
40   while (tpos) {
41     PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid));
42     gid--;
43     lid--;
44     garray[lid] = gid;
45   }
46   PetscCall(PetscSortInt(ec, garray));
47   PetscCall(PetscTableRemoveAll(gid1_lid1));
48   for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES));
49   /* compact out the extra columns in B */
50   for (i = 0; i < B->mbs; i++) {
51     for (j = 0; j < B->ilen[i]; j++) {
52       PetscInt gid1 = aj[B->i[i] + j] + 1;
53       PetscCall(PetscTableFind(gid1_lid1, gid1, &lid));
54       lid--;
55       aj[B->i[i] + j] = lid;
56     }
57   }
58   PetscCall(PetscTableDestroy(&gid1_lid1));
59   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
60   for (i = j = 0; i < ec; i++) {
61     while (garray[i] >= owners[j + 1]) j++;
62     ec_owner[i] = j;
63   }
64 #else
65   /* For the first stab we make an array as long as the number of columns */
66   /* mark those columns that are in sbaij->B */
67   PetscCall(PetscCalloc1(Nbs, &indices));
68   for (i = 0; i < mbs; i++) {
69     for (j = 0; j < B->ilen[i]; j++) {
70       if (!indices[aj[B->i[i] + j]]) ec++;
71       indices[aj[B->i[i] + j]] = 1;
72     }
73   }
74 
75   /* form arrays of columns we need */
76   PetscCall(PetscMalloc1(ec, &garray));
77   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
78 
79   ec = 0;
80   for (j = 0; j < sbaij->size; j++) {
81     for (i = owners[j]; i < owners[j + 1]; i++) {
82       if (indices[i]) {
83         garray[ec]   = i;
84         ec_owner[ec] = j;
85         ec++;
86       }
87     }
88   }
89 
90   /* make indices now point into garray */
91   for (i = 0; i < ec; i++) indices[garray[i]] = i;
92 
93   /* compact out the extra columns in B */
94   for (i = 0; i < mbs; i++) {
95     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
96   }
97   PetscCall(PetscFree(indices));
98 #endif
99   B->nbs = ec;
100   PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
101   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
102 
103   PetscCall(VecScatterDestroy(&sbaij->sMvctx));
104   /* create local vector that is used to scatter into */
105   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
106 
107   /* create two temporary index sets for building scatter-gather */
108   PetscCall(PetscMalloc1(2 * ec, &stmp));
109   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
110   for (i = 0; i < ec; i++) stmp[i] = i;
111   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
112 
113   /* generate the scatter context
114      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
115   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
116   PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
117   PetscCall(VecDestroy(&gvec));
118 
119   sbaij->garray = garray;
120 
121   PetscCall(ISDestroy(&from));
122   PetscCall(ISDestroy(&to));
123 
124   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
125   lsize = (mbs + ec) * bs;
126   PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0));
127   PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
128   PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
129 
130   PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
131 
132   /* x index in the IS sfrom */
133   for (i = 0; i < ec; i++) {
134     j          = ec_owner[i];
135     sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
136   }
137   /* b index in the IS sfrom */
138   k = sowners[rank] / bs + mbs;
139   for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
140   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
141 
142   /* x index in the IS sto */
143   k = sowners[rank] / bs + mbs;
144   for (i = 0; i < ec; i++) stmp[i] = (k + i);
145   /* b index in the IS sto */
146   for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
147 
148   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
149 
150   PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
151   PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
152 
153   PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
154   PetscCall(VecGetArray(sbaij->slvec1, &ptr));
155   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
156   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b));
157   PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
158 
159   PetscCall(VecGetArray(sbaij->slvec0, &ptr));
160   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b));
161   PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
162 
163   PetscCall(PetscFree(stmp));
164 
165   PetscCall(ISDestroy(&from));
166   PetscCall(ISDestroy(&to));
167   PetscCall(PetscFree2(sgarray, ec_owner));
168   PetscFunctionReturn(0);
169 }
170 
171 /*
172      Takes the local part of an already assembled MPISBAIJ matrix
173    and disassembles it. This is to allow new nonzeros into the matrix
174    that require more communication in the matrix vector multiply.
175    Thus certain data-structures must be rebuilt.
176 
177    Kind of slow! But that's what application programmers get when
178    they are sloppy.
179 */
180 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
181 {
182   Mat_MPISBAIJ *baij  = (Mat_MPISBAIJ *)A->data;
183   Mat           B     = baij->B, Bnew;
184   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ *)B->data;
185   PetscInt      i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
186   PetscInt      k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
187   MatScalar    *a = Bbaij->a;
188   PetscScalar  *atmp;
189 #if defined(PETSC_USE_REAL_MAT_SINGLE)
190   PetscInt l;
191 #endif
192 
193   PetscFunctionBegin;
194 #if defined(PETSC_USE_REAL_MAT_SINGLE)
195   PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
196 #endif
197   /* free stuff related to matrix-vec multiply */
198   PetscCall(VecDestroy(&baij->lvec));
199   PetscCall(VecScatterDestroy(&baij->Mvctx));
200 
201   PetscCall(VecDestroy(&baij->slvec0));
202   PetscCall(VecDestroy(&baij->slvec0b));
203   PetscCall(VecDestroy(&baij->slvec1));
204   PetscCall(VecDestroy(&baij->slvec1a));
205   PetscCall(VecDestroy(&baij->slvec1b));
206 
207   if (baij->colmap) {
208 #if defined(PETSC_USE_CTABLE)
209     PetscCall(PetscTableDestroy(&baij->colmap));
210 #else
211     PetscCall(PetscFree(baij->colmap));
212 #endif
213   }
214 
215   /* make sure that B is assembled so we can access its values */
216   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
217   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
218 
219   /* invent new B and copy stuff over */
220   PetscCall(PetscMalloc1(mbs, &nz));
221   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
222   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
223   PetscCall(MatSetSizes(Bnew, m, n, m, n));
224   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
225   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
226   PetscCall(PetscFree(nz));
227 
228   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
229     ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
230   }
231 
232   /*
233    Ensure that B's nonzerostate is monotonically increasing.
234    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
235    */
236   Bnew->nonzerostate = B->nonzerostate;
237   PetscCall(PetscMalloc1(bs, &rvals));
238   for (i = 0; i < mbs; i++) {
239     rvals[0] = bs * i;
240     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
241     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
242       col = garray[Bbaij->j[j]] * bs;
243       for (k = 0; k < bs; k++) {
244 #if defined(PETSC_USE_REAL_MAT_SINGLE)
245         for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
246 #else
247         atmp = a + j * bs2 + k * bs;
248 #endif
249         PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
250         col++;
251       }
252     }
253   }
254 #if defined(PETSC_USE_REAL_MAT_SINGLE)
255   PetscCall(PetscFree(atmp));
256 #endif
257   PetscCall(PetscFree(baij->garray));
258 
259   baij->garray = NULL;
260 
261   PetscCall(PetscFree(rvals));
262   PetscCall(MatDestroy(&B));
263 
264   baij->B = Bnew;
265 
266   A->was_assembled = PETSC_FALSE;
267   PetscFunctionReturn(0);
268 }
269