xref: /petsc/src/mat/impls/sell/mpi/mmsell.c (revision 9371c9d470a9602b6d10a8bf50c9b2280a79e45a)
1d4002b98SHong Zhang /*
2d4002b98SHong Zhang    Support for the parallel SELL matrix vector multiply
3d4002b98SHong Zhang */
4d4002b98SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h>
5d4002b98SHong Zhang #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
6d4002b98SHong Zhang 
7d4002b98SHong Zhang /*
8d4002b98SHong Zhang    Takes the local part of an already assembled MPISELL matrix
9d4002b98SHong Zhang    and disassembles it. This is to allow new nonzeros into the matrix
10d4002b98SHong Zhang    that require more communication in the matrix vector multiply.
11d4002b98SHong Zhang    Thus certain data-structures must be rebuilt.
12d4002b98SHong Zhang 
13d4002b98SHong Zhang    Kind of slow! But that's what application programmers get when
14d4002b98SHong Zhang    they are sloppy.
15d4002b98SHong Zhang */
16*9371c9d4SSatish Balay PetscErrorCode MatDisAssemble_MPISELL(Mat A) {
17d4002b98SHong Zhang   Mat_MPISELL *sell  = (Mat_MPISELL *)A->data;
18d4002b98SHong Zhang   Mat          B     = sell->B, Bnew;
19d4002b98SHong Zhang   Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data;
20d4002b98SHong Zhang   PetscInt     i, j, totalslices, N = A->cmap->N, ec, row;
21d4002b98SHong Zhang   PetscBool    isnonzero;
22d4002b98SHong Zhang 
23d4002b98SHong Zhang   PetscFunctionBegin;
24d4002b98SHong Zhang   /* free stuff related to matrix-vec multiply */
259566063dSJacob Faibussowitsch   PetscCall(VecGetSize(sell->lvec, &ec)); /* needed for PetscLogObjectMemory below */
269566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&sell->lvec));
279566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&sell->Mvctx));
28d4002b98SHong Zhang   if (sell->colmap) {
29d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
309566063dSJacob Faibussowitsch     PetscCall(PetscTableDestroy(&sell->colmap));
31d4002b98SHong Zhang #else
329566063dSJacob Faibussowitsch     PetscCall(PetscFree(sell->colmap));
339566063dSJacob Faibussowitsch     PetscCall(PetscLogObjectMemory((PetscObject)A, -sell->B->cmap->n * sizeof(PetscInt)));
34d4002b98SHong Zhang #endif
35d4002b98SHong Zhang   }
36d4002b98SHong Zhang 
37d4002b98SHong Zhang   /* make sure that B is assembled so we can access its values */
389566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
399566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
40d4002b98SHong Zhang 
41d4002b98SHong Zhang   /* invent new B and copy stuff over */
429566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
439566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N));
449566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
459566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
469566063dSJacob Faibussowitsch   PetscCall(MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen));
47b38c15b3SStefano Zampini   if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */
48b38c15b3SStefano Zampini     ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew;
49b38c15b3SStefano Zampini   }
50d4002b98SHong Zhang 
51d4002b98SHong Zhang   /*
52d4002b98SHong Zhang    Ensure that B's nonzerostate is monotonically increasing.
53d4002b98SHong Zhang    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
54d4002b98SHong Zhang    */
55d4002b98SHong Zhang   Bnew->nonzerostate = B->nonzerostate;
56d4002b98SHong Zhang 
57d4002b98SHong Zhang   totalslices = B->rmap->n / 8 + ((B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */
58d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) {                           /* loop over slices */
59d4002b98SHong Zhang     for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) {
60d4002b98SHong Zhang       isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / 8 < Bsell->rlen[8 * i + row]);
61*9371c9d4SSatish Balay       if (isnonzero) { PetscCall(MatSetValue(Bnew, 8 * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode)); }
62d4002b98SHong Zhang     }
63d4002b98SHong Zhang   }
64d4002b98SHong Zhang 
659566063dSJacob Faibussowitsch   PetscCall(PetscFree(sell->garray));
669566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)A, -ec * sizeof(PetscInt)));
679566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
689566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)Bnew));
69d4002b98SHong Zhang 
70d4002b98SHong Zhang   sell->B          = Bnew;
71d4002b98SHong Zhang   A->was_assembled = PETSC_FALSE;
72d4002b98SHong Zhang   A->assembled     = PETSC_FALSE;
73d4002b98SHong Zhang   PetscFunctionReturn(0);
74d4002b98SHong Zhang }
75d4002b98SHong Zhang 
76*9371c9d4SSatish Balay PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat) {
77d4002b98SHong Zhang   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
78d4002b98SHong Zhang   Mat_SeqSELL *B    = (Mat_SeqSELL *)(sell->B->data);
79d4002b98SHong Zhang   PetscInt     i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices;
80d4002b98SHong Zhang   IS           from, to;
81d4002b98SHong Zhang   Vec          gvec;
82d4002b98SHong Zhang   PetscBool    isnonzero;
83d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
84d4002b98SHong Zhang   PetscTable         gid1_lid1;
85d4002b98SHong Zhang   PetscTablePosition tpos;
86d4002b98SHong Zhang   PetscInt           gid, lid;
87d4002b98SHong Zhang #else
88d4002b98SHong Zhang   PetscInt N = mat->cmap->N, *indices;
89d4002b98SHong Zhang #endif
90d4002b98SHong Zhang 
91d4002b98SHong Zhang   PetscFunctionBegin;
92d4002b98SHong Zhang   totalslices = sell->B->rmap->n / 8 + ((sell->B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */
93d4002b98SHong Zhang 
94d4002b98SHong Zhang   /* ec counts the number of columns that contain nonzeros */
95d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
96d4002b98SHong Zhang   /* use a table */
979566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(sell->B->rmap->n, mat->cmap->N + 1, &gid1_lid1));
98d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
99d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
100d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
101d4002b98SHong Zhang       if (isnonzero) { /* check the mask bit */
102d4002b98SHong Zhang         PetscInt data, gid1 = bcolidx[j] + 1;
1039566063dSJacob Faibussowitsch         PetscCall(PetscTableFind(gid1_lid1, gid1, &data));
104d4002b98SHong Zhang         if (!data) {
105d4002b98SHong Zhang           /* one based table */
1069566063dSJacob Faibussowitsch           PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES));
107d4002b98SHong Zhang         }
108d4002b98SHong Zhang       }
109d4002b98SHong Zhang     }
110d4002b98SHong Zhang   }
111d4002b98SHong Zhang 
112d4002b98SHong Zhang   /* form array of columns we need */
1139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
1149566063dSJacob Faibussowitsch   PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos));
115d4002b98SHong Zhang   while (tpos) {
1169566063dSJacob Faibussowitsch     PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid));
117d4002b98SHong Zhang     gid--;
118d4002b98SHong Zhang     lid--;
119d4002b98SHong Zhang     garray[lid] = gid;
120d4002b98SHong Zhang   }
1219566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
1229566063dSJacob Faibussowitsch   PetscCall(PetscTableRemoveAll(gid1_lid1));
123*9371c9d4SSatish Balay   for (i = 0; i < ec; i++) { PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); }
124d4002b98SHong Zhang 
125d4002b98SHong Zhang   /* compact out the extra columns in B */
126d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
127d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
128d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
129d4002b98SHong Zhang       if (isnonzero) {
130d4002b98SHong Zhang         PetscInt gid1 = bcolidx[j] + 1;
1319566063dSJacob Faibussowitsch         PetscCall(PetscTableFind(gid1_lid1, gid1, &lid));
132d4002b98SHong Zhang         lid--;
133d4002b98SHong Zhang         bcolidx[j] = lid;
134d4002b98SHong Zhang       }
135d4002b98SHong Zhang     }
136d4002b98SHong Zhang   }
1379566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sell->B->cmap));
1389566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap));
1399566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&gid1_lid1));
140d4002b98SHong Zhang #else
141d4002b98SHong Zhang   /* Make an array as long as the number of columns */
1429566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(N, &indices));
143d4002b98SHong Zhang   /* mark those columns that are in sell->B */
144d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
145d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
146d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
147d4002b98SHong Zhang       if (isnonzero) {
148d4002b98SHong Zhang         if (!indices[bcolidx[j]]) ec++;
149d4002b98SHong Zhang         indices[bcolidx[j]] = 1;
150d4002b98SHong Zhang       }
151d4002b98SHong Zhang     }
152d4002b98SHong Zhang   }
153d4002b98SHong Zhang 
154d4002b98SHong Zhang   /* form array of columns we need */
1559566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
156d4002b98SHong Zhang   ec = 0;
157d4002b98SHong Zhang   for (i = 0; i < N; i++) {
158d4002b98SHong Zhang     if (indices[i]) garray[ec++] = i;
159d4002b98SHong Zhang   }
160d4002b98SHong Zhang 
161d4002b98SHong Zhang   /* make indices now point into garray */
162*9371c9d4SSatish Balay   for (i = 0; i < ec; i++) { indices[garray[i]] = i; }
163d4002b98SHong Zhang 
164d4002b98SHong Zhang   /* compact out the extra columns in B */
165d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
166d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
167d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
168d4002b98SHong Zhang       if (isnonzero) bcolidx[j] = indices[bcolidx[j]];
169d4002b98SHong Zhang     }
170d4002b98SHong Zhang   }
1719566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sell->B->cmap));
1729566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap));
1739566063dSJacob Faibussowitsch   PetscCall(PetscFree(indices));
174d4002b98SHong Zhang #endif
175d4002b98SHong Zhang   /* create local vector that is used to scatter into */
1769566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec));
177d4002b98SHong Zhang   /* create two temporary Index sets for build scatter gather */
1789566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
1799566063dSJacob Faibussowitsch   PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));
180d4002b98SHong Zhang 
181d4002b98SHong Zhang   /* create temporary global vector to generate scatter context */
182d4002b98SHong Zhang   /* This does not allocate the array's memory so is efficient */
1839566063dSJacob Faibussowitsch   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
184d4002b98SHong Zhang 
185d4002b98SHong Zhang   /* generate the scatter context */
1869566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx));
1879566063dSJacob Faibussowitsch   PetscCall(VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
1889566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sell->Mvctx));
1899566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sell->lvec));
1909566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from));
1919566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to));
192d4002b98SHong Zhang 
193d4002b98SHong Zhang   sell->garray = garray;
194d4002b98SHong Zhang 
1959566063dSJacob Faibussowitsch   PetscCall(PetscLogObjectMemory((PetscObject)mat, ec * sizeof(PetscInt)));
1969566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1979566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
1989566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&gvec));
199d4002b98SHong Zhang   PetscFunctionReturn(0);
200d4002b98SHong Zhang }
201d4002b98SHong Zhang 
202d4002b98SHong Zhang /*      ugly stuff added for Glenn someday we should fix this up */
203f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
204f4259b30SLisandro Dalcin static Vec       auglydd = NULL, auglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
205d4002b98SHong Zhang 
206*9371c9d4SSatish Balay PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale) {
207d4002b98SHong Zhang   Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */
208d4002b98SHong Zhang   PetscInt     i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices;
209d4002b98SHong Zhang   PetscInt    *r_rmapd, *r_rmapo;
210d4002b98SHong Zhang 
211d4002b98SHong Zhang   PetscFunctionBegin;
2129566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
2139566063dSJacob Faibussowitsch   PetscCall(MatGetSize(ina->A, NULL, &n));
2149566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
215d4002b98SHong Zhang   nt = 0;
216d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
217d4002b98SHong Zhang     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
218d4002b98SHong Zhang       nt++;
219d4002b98SHong Zhang       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
220d4002b98SHong Zhang     }
221d4002b98SHong Zhang   }
22208401ef6SPierre Jolivet   PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n);
2239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &auglyrmapd));
224d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
225*9371c9d4SSatish Balay     if (r_rmapd[i]) { auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; }
226d4002b98SHong Zhang   }
2279566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapd));
2289566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));
2299566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices));
230*9371c9d4SSatish Balay   for (i = 0; i < ina->B->cmap->n; i++) { lindices[garray[i]] = i + 1; }
231d4002b98SHong Zhang   no = inA->rmap->mapping->n - nt;
2329566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
233d4002b98SHong Zhang   nt = 0;
234d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
235d4002b98SHong Zhang     if (lindices[inA->rmap->mapping->indices[i]]) {
236d4002b98SHong Zhang       nt++;
237d4002b98SHong Zhang       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
238d4002b98SHong Zhang     }
239d4002b98SHong Zhang   }
24008401ef6SPierre Jolivet   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
2419566063dSJacob Faibussowitsch   PetscCall(PetscFree(lindices));
2429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nt + 1, &auglyrmapo));
243d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
244*9371c9d4SSatish Balay     if (r_rmapo[i]) { auglyrmapo[(r_rmapo[i] - 1)] = i; }
245d4002b98SHong Zhang   }
2469566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapo));
2479566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo));
248d4002b98SHong Zhang   PetscFunctionReturn(0);
249d4002b98SHong Zhang }
250d4002b98SHong Zhang 
251*9371c9d4SSatish Balay PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale) {
252d4002b98SHong Zhang   Mat_MPISELL       *a = (Mat_MPISELL *)A->data; /*access private part of matrix */
253d4002b98SHong Zhang   PetscInt           n, i;
254d4002b98SHong Zhang   PetscScalar       *d, *o;
255d4002b98SHong Zhang   const PetscScalar *s;
256d4002b98SHong Zhang 
257d4002b98SHong Zhang   PetscFunctionBegin;
258*9371c9d4SSatish Balay   if (!auglyrmapd) { PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A, scale)); }
2599566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(scale, &s));
2609566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(auglydd, &n));
2619566063dSJacob Faibussowitsch   PetscCall(VecGetArray(auglydd, &d));
262*9371c9d4SSatish Balay   for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
2639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(auglydd, &d));
264d4002b98SHong Zhang   /* column scale "diagonal" portion of local matrix */
2659566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->A, NULL, auglydd));
2669566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(auglyoo, &n));
2679566063dSJacob Faibussowitsch   PetscCall(VecGetArray(auglyoo, &o));
268*9371c9d4SSatish Balay   for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
2699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(scale, &s));
2709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(auglyoo, &o));
271d4002b98SHong Zhang   /* column scale "off-diagonal" portion of local matrix */
2729566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
273d4002b98SHong Zhang   PetscFunctionReturn(0);
274d4002b98SHong Zhang }
275