xref: /petsc/src/mat/impls/sell/mpi/mmsell.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
1d4002b98SHong Zhang /*
2d4002b98SHong Zhang    Support for the parallel SELL matrix vector multiply
3d4002b98SHong Zhang */
4d4002b98SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h>
5d4002b98SHong Zhang #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
6d4002b98SHong Zhang 
7d4002b98SHong Zhang /*
8d4002b98SHong Zhang    Takes the local part of an already assembled MPISELL matrix
9d4002b98SHong Zhang    and disassembles it. This is to allow new nonzeros into the matrix
10d4002b98SHong Zhang    that require more communication in the matrix vector multiply.
11d4002b98SHong Zhang    Thus certain data-structures must be rebuilt.
12d4002b98SHong Zhang 
13d4002b98SHong Zhang    Kind of slow! But that's what application programmers get when
14d4002b98SHong Zhang    they are sloppy.
15d4002b98SHong Zhang */
16*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPISELL(Mat A)
17*d71ae5a4SJacob Faibussowitsch {
18d4002b98SHong Zhang   Mat_MPISELL *sell  = (Mat_MPISELL *)A->data;
19d4002b98SHong Zhang   Mat          B     = sell->B, Bnew;
20d4002b98SHong Zhang   Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data;
214dfa11a4SJacob Faibussowitsch   PetscInt     i, j, totalslices, N = A->cmap->N, row;
22d4002b98SHong Zhang   PetscBool    isnonzero;
23d4002b98SHong Zhang 
24d4002b98SHong Zhang   PetscFunctionBegin;
25d4002b98SHong Zhang   /* free stuff related to matrix-vec multiply */
269566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&sell->lvec));
279566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&sell->Mvctx));
28d4002b98SHong Zhang   if (sell->colmap) {
29d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
309566063dSJacob Faibussowitsch     PetscCall(PetscTableDestroy(&sell->colmap));
31d4002b98SHong Zhang #else
329566063dSJacob Faibussowitsch     PetscCall(PetscFree(sell->colmap));
33d4002b98SHong Zhang #endif
34d4002b98SHong Zhang   }
35d4002b98SHong Zhang 
36d4002b98SHong Zhang   /* make sure that B is assembled so we can access its values */
379566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
389566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
39d4002b98SHong Zhang 
40d4002b98SHong Zhang   /* invent new B and copy stuff over */
419566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
429566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N));
439566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
449566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
459566063dSJacob Faibussowitsch   PetscCall(MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen));
46b38c15b3SStefano Zampini   if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */
47b38c15b3SStefano Zampini     ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew;
48b38c15b3SStefano Zampini   }
49d4002b98SHong Zhang 
50d4002b98SHong Zhang   /*
51d4002b98SHong Zhang    Ensure that B's nonzerostate is monotonically increasing.
52d4002b98SHong Zhang    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
53d4002b98SHong Zhang    */
54d4002b98SHong Zhang   Bnew->nonzerostate = B->nonzerostate;
55d4002b98SHong Zhang 
56d4002b98SHong Zhang   totalslices = B->rmap->n / 8 + ((B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */
57d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) {                           /* loop over slices */
58d4002b98SHong Zhang     for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) {
59d4002b98SHong Zhang       isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / 8 < Bsell->rlen[8 * i + row]);
6048a46eb9SPierre Jolivet       if (isnonzero) PetscCall(MatSetValue(Bnew, 8 * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode));
61d4002b98SHong Zhang     }
62d4002b98SHong Zhang   }
63d4002b98SHong Zhang 
649566063dSJacob Faibussowitsch   PetscCall(PetscFree(sell->garray));
659566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
66d4002b98SHong Zhang 
67d4002b98SHong Zhang   sell->B          = Bnew;
68d4002b98SHong Zhang   A->was_assembled = PETSC_FALSE;
69d4002b98SHong Zhang   A->assembled     = PETSC_FALSE;
70d4002b98SHong Zhang   PetscFunctionReturn(0);
71d4002b98SHong Zhang }
72d4002b98SHong Zhang 
73*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat)
74*d71ae5a4SJacob Faibussowitsch {
75d4002b98SHong Zhang   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
76d4002b98SHong Zhang   Mat_SeqSELL *B    = (Mat_SeqSELL *)(sell->B->data);
77d4002b98SHong Zhang   PetscInt     i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices;
78d4002b98SHong Zhang   IS           from, to;
79d4002b98SHong Zhang   Vec          gvec;
80d4002b98SHong Zhang   PetscBool    isnonzero;
81d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
82d4002b98SHong Zhang   PetscTable         gid1_lid1;
83d4002b98SHong Zhang   PetscTablePosition tpos;
84d4002b98SHong Zhang   PetscInt           gid, lid;
85d4002b98SHong Zhang #else
86d4002b98SHong Zhang   PetscInt N = mat->cmap->N, *indices;
87d4002b98SHong Zhang #endif
88d4002b98SHong Zhang 
89d4002b98SHong Zhang   PetscFunctionBegin;
90d4002b98SHong Zhang   totalslices = sell->B->rmap->n / 8 + ((sell->B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */
91d4002b98SHong Zhang 
92d4002b98SHong Zhang   /* ec counts the number of columns that contain nonzeros */
93d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
94d4002b98SHong Zhang   /* use a table */
959566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(sell->B->rmap->n, mat->cmap->N + 1, &gid1_lid1));
96d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
97d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
98d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
99d4002b98SHong Zhang       if (isnonzero) { /* check the mask bit */
100d4002b98SHong Zhang         PetscInt data, gid1 = bcolidx[j] + 1;
1019566063dSJacob Faibussowitsch         PetscCall(PetscTableFind(gid1_lid1, gid1, &data));
102d4002b98SHong Zhang         if (!data) {
103d4002b98SHong Zhang           /* one based table */
1049566063dSJacob Faibussowitsch           PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES));
105d4002b98SHong Zhang         }
106d4002b98SHong Zhang       }
107d4002b98SHong Zhang     }
108d4002b98SHong Zhang   }
109d4002b98SHong Zhang 
110d4002b98SHong Zhang   /* form array of columns we need */
1119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
1129566063dSJacob Faibussowitsch   PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos));
113d4002b98SHong Zhang   while (tpos) {
1149566063dSJacob Faibussowitsch     PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid));
115d4002b98SHong Zhang     gid--;
116d4002b98SHong Zhang     lid--;
117d4002b98SHong Zhang     garray[lid] = gid;
118d4002b98SHong Zhang   }
1199566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
1209566063dSJacob Faibussowitsch   PetscCall(PetscTableRemoveAll(gid1_lid1));
12148a46eb9SPierre Jolivet   for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES));
122d4002b98SHong Zhang 
123d4002b98SHong Zhang   /* compact out the extra columns in B */
124d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
125d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
126d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
127d4002b98SHong Zhang       if (isnonzero) {
128d4002b98SHong Zhang         PetscInt gid1 = bcolidx[j] + 1;
1299566063dSJacob Faibussowitsch         PetscCall(PetscTableFind(gid1_lid1, gid1, &lid));
130d4002b98SHong Zhang         lid--;
131d4002b98SHong Zhang         bcolidx[j] = lid;
132d4002b98SHong Zhang       }
133d4002b98SHong Zhang     }
134d4002b98SHong Zhang   }
1359566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sell->B->cmap));
1369566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap));
1379566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&gid1_lid1));
138d4002b98SHong Zhang #else
139d4002b98SHong Zhang   /* Make an array as long as the number of columns */
1409566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(N, &indices));
141d4002b98SHong Zhang   /* mark those columns that are in sell->B */
142d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
143d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
144d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
145d4002b98SHong Zhang       if (isnonzero) {
146d4002b98SHong Zhang         if (!indices[bcolidx[j]]) ec++;
147d4002b98SHong Zhang         indices[bcolidx[j]] = 1;
148d4002b98SHong Zhang       }
149d4002b98SHong Zhang     }
150d4002b98SHong Zhang   }
151d4002b98SHong Zhang 
152d4002b98SHong Zhang   /* form array of columns we need */
1539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
154d4002b98SHong Zhang   ec = 0;
155d4002b98SHong Zhang   for (i = 0; i < N; i++) {
156d4002b98SHong Zhang     if (indices[i]) garray[ec++] = i;
157d4002b98SHong Zhang   }
158d4002b98SHong Zhang 
159d4002b98SHong Zhang   /* make indices now point into garray */
160ad540459SPierre Jolivet   for (i = 0; i < ec; i++) indices[garray[i]] = i;
161d4002b98SHong Zhang 
162d4002b98SHong Zhang   /* compact out the extra columns in B */
163d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
164d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
165d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
166d4002b98SHong Zhang       if (isnonzero) bcolidx[j] = indices[bcolidx[j]];
167d4002b98SHong Zhang     }
168d4002b98SHong Zhang   }
1699566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sell->B->cmap));
1709566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap));
1719566063dSJacob Faibussowitsch   PetscCall(PetscFree(indices));
172d4002b98SHong Zhang #endif
173d4002b98SHong Zhang   /* create local vector that is used to scatter into */
1749566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec));
175d4002b98SHong Zhang   /* create two temporary Index sets for build scatter gather */
1769566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
1779566063dSJacob Faibussowitsch   PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));
178d4002b98SHong Zhang 
179d4002b98SHong Zhang   /* create temporary global vector to generate scatter context */
180d4002b98SHong Zhang   /* This does not allocate the array's memory so is efficient */
1819566063dSJacob Faibussowitsch   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
182d4002b98SHong Zhang 
183d4002b98SHong Zhang   /* generate the scatter context */
1849566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx));
1859566063dSJacob Faibussowitsch   PetscCall(VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
186d4002b98SHong Zhang 
187d4002b98SHong Zhang   sell->garray = garray;
188d4002b98SHong Zhang 
1899566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1909566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
1919566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&gvec));
192d4002b98SHong Zhang   PetscFunctionReturn(0);
193d4002b98SHong Zhang }
194d4002b98SHong Zhang 
195d4002b98SHong Zhang /*      ugly stuff added for Glenn someday we should fix this up */
196f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
197f4259b30SLisandro Dalcin static Vec       auglydd = NULL, auglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
198d4002b98SHong Zhang 
199*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale)
200*d71ae5a4SJacob Faibussowitsch {
201d4002b98SHong Zhang   Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */
202d4002b98SHong Zhang   PetscInt     i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices;
203d4002b98SHong Zhang   PetscInt    *r_rmapd, *r_rmapo;
204d4002b98SHong Zhang 
205d4002b98SHong Zhang   PetscFunctionBegin;
2069566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
2079566063dSJacob Faibussowitsch   PetscCall(MatGetSize(ina->A, NULL, &n));
2089566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
209d4002b98SHong Zhang   nt = 0;
210d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
211d4002b98SHong Zhang     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
212d4002b98SHong Zhang       nt++;
213d4002b98SHong Zhang       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
214d4002b98SHong Zhang     }
215d4002b98SHong Zhang   }
21608401ef6SPierre Jolivet   PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n);
2179566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &auglyrmapd));
218d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
219ad540459SPierre Jolivet     if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i;
220d4002b98SHong Zhang   }
2219566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapd));
2229566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));
2239566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices));
224ad540459SPierre Jolivet   for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1;
225d4002b98SHong Zhang   no = inA->rmap->mapping->n - nt;
2269566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
227d4002b98SHong Zhang   nt = 0;
228d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
229d4002b98SHong Zhang     if (lindices[inA->rmap->mapping->indices[i]]) {
230d4002b98SHong Zhang       nt++;
231d4002b98SHong Zhang       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
232d4002b98SHong Zhang     }
233d4002b98SHong Zhang   }
23408401ef6SPierre Jolivet   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
2359566063dSJacob Faibussowitsch   PetscCall(PetscFree(lindices));
2369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nt + 1, &auglyrmapo));
237d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
238ad540459SPierre Jolivet     if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i;
239d4002b98SHong Zhang   }
2409566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapo));
2419566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo));
242d4002b98SHong Zhang   PetscFunctionReturn(0);
243d4002b98SHong Zhang }
244d4002b98SHong Zhang 
245*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale)
246*d71ae5a4SJacob Faibussowitsch {
247d4002b98SHong Zhang   Mat_MPISELL       *a = (Mat_MPISELL *)A->data; /*access private part of matrix */
248d4002b98SHong Zhang   PetscInt           n, i;
249d4002b98SHong Zhang   PetscScalar       *d, *o;
250d4002b98SHong Zhang   const PetscScalar *s;
251d4002b98SHong Zhang 
252d4002b98SHong Zhang   PetscFunctionBegin;
25348a46eb9SPierre Jolivet   if (!auglyrmapd) PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A, scale));
2549566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(scale, &s));
2559566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(auglydd, &n));
2569566063dSJacob Faibussowitsch   PetscCall(VecGetArray(auglydd, &d));
2579371c9d4SSatish Balay   for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
2589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(auglydd, &d));
259d4002b98SHong Zhang   /* column scale "diagonal" portion of local matrix */
2609566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->A, NULL, auglydd));
2619566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(auglyoo, &n));
2629566063dSJacob Faibussowitsch   PetscCall(VecGetArray(auglyoo, &o));
2639371c9d4SSatish Balay   for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
2649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(scale, &s));
2659566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(auglyoo, &o));
266d4002b98SHong Zhang   /* column scale "off-diagonal" portion of local matrix */
2679566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
268d4002b98SHong Zhang   PetscFunctionReturn(0);
269d4002b98SHong Zhang }
270