xref: /petsc/src/mat/impls/sell/mpi/mmsell.c (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
1d4002b98SHong Zhang /*
2d4002b98SHong Zhang    Support for the parallel SELL matrix vector multiply
3d4002b98SHong Zhang */
4d4002b98SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h>
5d4002b98SHong Zhang #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
6d4002b98SHong Zhang 
7d4002b98SHong Zhang /*
8d4002b98SHong Zhang    Takes the local part of an already assembled MPISELL matrix
9d4002b98SHong Zhang    and disassembles it. This is to allow new nonzeros into the matrix
10d4002b98SHong Zhang    that require more communication in the matrix vector multiply.
11d4002b98SHong Zhang    Thus certain data-structures must be rebuilt.
12d4002b98SHong Zhang 
13d4002b98SHong Zhang    Kind of slow! But that's what application programmers get when
14d4002b98SHong Zhang    they are sloppy.
15d4002b98SHong Zhang */
169371c9d4SSatish Balay PetscErrorCode MatDisAssemble_MPISELL(Mat A) {
17d4002b98SHong Zhang   Mat_MPISELL *sell  = (Mat_MPISELL *)A->data;
18d4002b98SHong Zhang   Mat          B     = sell->B, Bnew;
19d4002b98SHong Zhang   Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data;
20*4dfa11a4SJacob Faibussowitsch   PetscInt     i, j, totalslices, N = A->cmap->N, row;
21d4002b98SHong Zhang   PetscBool    isnonzero;
22d4002b98SHong Zhang 
23d4002b98SHong Zhang   PetscFunctionBegin;
24d4002b98SHong Zhang   /* free stuff related to matrix-vec multiply */
259566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&sell->lvec));
269566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&sell->Mvctx));
27d4002b98SHong Zhang   if (sell->colmap) {
28d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
299566063dSJacob Faibussowitsch     PetscCall(PetscTableDestroy(&sell->colmap));
30d4002b98SHong Zhang #else
319566063dSJacob Faibussowitsch     PetscCall(PetscFree(sell->colmap));
32d4002b98SHong Zhang #endif
33d4002b98SHong Zhang   }
34d4002b98SHong Zhang 
35d4002b98SHong Zhang   /* make sure that B is assembled so we can access its values */
369566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
379566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
38d4002b98SHong Zhang 
39d4002b98SHong Zhang   /* invent new B and copy stuff over */
409566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
419566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N));
429566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
439566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
449566063dSJacob Faibussowitsch   PetscCall(MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen));
45b38c15b3SStefano Zampini   if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */
46b38c15b3SStefano Zampini     ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew;
47b38c15b3SStefano Zampini   }
48d4002b98SHong Zhang 
49d4002b98SHong Zhang   /*
50d4002b98SHong Zhang    Ensure that B's nonzerostate is monotonically increasing.
51d4002b98SHong Zhang    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
52d4002b98SHong Zhang    */
53d4002b98SHong Zhang   Bnew->nonzerostate = B->nonzerostate;
54d4002b98SHong Zhang 
55d4002b98SHong Zhang   totalslices = B->rmap->n / 8 + ((B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */
56d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) {                           /* loop over slices */
57d4002b98SHong Zhang     for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) {
58d4002b98SHong Zhang       isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / 8 < Bsell->rlen[8 * i + row]);
5948a46eb9SPierre Jolivet       if (isnonzero) PetscCall(MatSetValue(Bnew, 8 * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode));
60d4002b98SHong Zhang     }
61d4002b98SHong Zhang   }
62d4002b98SHong Zhang 
639566063dSJacob Faibussowitsch   PetscCall(PetscFree(sell->garray));
649566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
65d4002b98SHong Zhang 
66d4002b98SHong Zhang   sell->B          = Bnew;
67d4002b98SHong Zhang   A->was_assembled = PETSC_FALSE;
68d4002b98SHong Zhang   A->assembled     = PETSC_FALSE;
69d4002b98SHong Zhang   PetscFunctionReturn(0);
70d4002b98SHong Zhang }
71d4002b98SHong Zhang 
729371c9d4SSatish Balay PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat) {
73d4002b98SHong Zhang   Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
74d4002b98SHong Zhang   Mat_SeqSELL *B    = (Mat_SeqSELL *)(sell->B->data);
75d4002b98SHong Zhang   PetscInt     i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices;
76d4002b98SHong Zhang   IS           from, to;
77d4002b98SHong Zhang   Vec          gvec;
78d4002b98SHong Zhang   PetscBool    isnonzero;
79d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
80d4002b98SHong Zhang   PetscTable         gid1_lid1;
81d4002b98SHong Zhang   PetscTablePosition tpos;
82d4002b98SHong Zhang   PetscInt           gid, lid;
83d4002b98SHong Zhang #else
84d4002b98SHong Zhang   PetscInt N = mat->cmap->N, *indices;
85d4002b98SHong Zhang #endif
86d4002b98SHong Zhang 
87d4002b98SHong Zhang   PetscFunctionBegin;
88d4002b98SHong Zhang   totalslices = sell->B->rmap->n / 8 + ((sell->B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */
89d4002b98SHong Zhang 
90d4002b98SHong Zhang   /* ec counts the number of columns that contain nonzeros */
91d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE)
92d4002b98SHong Zhang   /* use a table */
939566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(sell->B->rmap->n, mat->cmap->N + 1, &gid1_lid1));
94d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
95d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
96d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
97d4002b98SHong Zhang       if (isnonzero) { /* check the mask bit */
98d4002b98SHong Zhang         PetscInt data, gid1 = bcolidx[j] + 1;
999566063dSJacob Faibussowitsch         PetscCall(PetscTableFind(gid1_lid1, gid1, &data));
100d4002b98SHong Zhang         if (!data) {
101d4002b98SHong Zhang           /* one based table */
1029566063dSJacob Faibussowitsch           PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES));
103d4002b98SHong Zhang         }
104d4002b98SHong Zhang       }
105d4002b98SHong Zhang     }
106d4002b98SHong Zhang   }
107d4002b98SHong Zhang 
108d4002b98SHong Zhang   /* form array of columns we need */
1099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
1109566063dSJacob Faibussowitsch   PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos));
111d4002b98SHong Zhang   while (tpos) {
1129566063dSJacob Faibussowitsch     PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid));
113d4002b98SHong Zhang     gid--;
114d4002b98SHong Zhang     lid--;
115d4002b98SHong Zhang     garray[lid] = gid;
116d4002b98SHong Zhang   }
1179566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
1189566063dSJacob Faibussowitsch   PetscCall(PetscTableRemoveAll(gid1_lid1));
11948a46eb9SPierre Jolivet   for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES));
120d4002b98SHong Zhang 
121d4002b98SHong Zhang   /* compact out the extra columns in B */
122d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
123d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
124d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
125d4002b98SHong Zhang       if (isnonzero) {
126d4002b98SHong Zhang         PetscInt gid1 = bcolidx[j] + 1;
1279566063dSJacob Faibussowitsch         PetscCall(PetscTableFind(gid1_lid1, gid1, &lid));
128d4002b98SHong Zhang         lid--;
129d4002b98SHong Zhang         bcolidx[j] = lid;
130d4002b98SHong Zhang       }
131d4002b98SHong Zhang     }
132d4002b98SHong Zhang   }
1339566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sell->B->cmap));
1349566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap));
1359566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&gid1_lid1));
136d4002b98SHong Zhang #else
137d4002b98SHong Zhang   /* Make an array as long as the number of columns */
1389566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(N, &indices));
139d4002b98SHong Zhang   /* mark those columns that are in sell->B */
140d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
141d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
142d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
143d4002b98SHong Zhang       if (isnonzero) {
144d4002b98SHong Zhang         if (!indices[bcolidx[j]]) ec++;
145d4002b98SHong Zhang         indices[bcolidx[j]] = 1;
146d4002b98SHong Zhang       }
147d4002b98SHong Zhang     }
148d4002b98SHong Zhang   }
149d4002b98SHong Zhang 
150d4002b98SHong Zhang   /* form array of columns we need */
1519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
152d4002b98SHong Zhang   ec = 0;
153d4002b98SHong Zhang   for (i = 0; i < N; i++) {
154d4002b98SHong Zhang     if (indices[i]) garray[ec++] = i;
155d4002b98SHong Zhang   }
156d4002b98SHong Zhang 
157d4002b98SHong Zhang   /* make indices now point into garray */
158ad540459SPierre Jolivet   for (i = 0; i < ec; i++) indices[garray[i]] = i;
159d4002b98SHong Zhang 
160d4002b98SHong Zhang   /* compact out the extra columns in B */
161d4002b98SHong Zhang   for (i = 0; i < totalslices; i++) { /* loop over slices */
162d4002b98SHong Zhang     for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) {
163d4002b98SHong Zhang       isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]);
164d4002b98SHong Zhang       if (isnonzero) bcolidx[j] = indices[bcolidx[j]];
165d4002b98SHong Zhang     }
166d4002b98SHong Zhang   }
1679566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sell->B->cmap));
1689566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap));
1699566063dSJacob Faibussowitsch   PetscCall(PetscFree(indices));
170d4002b98SHong Zhang #endif
171d4002b98SHong Zhang   /* create local vector that is used to scatter into */
1729566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec));
173d4002b98SHong Zhang   /* create two temporary Index sets for build scatter gather */
1749566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
1759566063dSJacob Faibussowitsch   PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));
176d4002b98SHong Zhang 
177d4002b98SHong Zhang   /* create temporary global vector to generate scatter context */
178d4002b98SHong Zhang   /* This does not allocate the array's memory so is efficient */
1799566063dSJacob Faibussowitsch   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
180d4002b98SHong Zhang 
181d4002b98SHong Zhang   /* generate the scatter context */
1829566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx));
1839566063dSJacob Faibussowitsch   PetscCall(VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
184d4002b98SHong Zhang 
185d4002b98SHong Zhang   sell->garray = garray;
186d4002b98SHong Zhang 
1879566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1889566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
1899566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&gvec));
190d4002b98SHong Zhang   PetscFunctionReturn(0);
191d4002b98SHong Zhang }
192d4002b98SHong Zhang 
193d4002b98SHong Zhang /*      ugly stuff added for Glenn someday we should fix this up */
194f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
195f4259b30SLisandro Dalcin static Vec       auglydd = NULL, auglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
196d4002b98SHong Zhang 
1979371c9d4SSatish Balay PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale) {
198d4002b98SHong Zhang   Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */
199d4002b98SHong Zhang   PetscInt     i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices;
200d4002b98SHong Zhang   PetscInt    *r_rmapd, *r_rmapo;
201d4002b98SHong Zhang 
202d4002b98SHong Zhang   PetscFunctionBegin;
2039566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
2049566063dSJacob Faibussowitsch   PetscCall(MatGetSize(ina->A, NULL, &n));
2059566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
206d4002b98SHong Zhang   nt = 0;
207d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
208d4002b98SHong Zhang     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
209d4002b98SHong Zhang       nt++;
210d4002b98SHong Zhang       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
211d4002b98SHong Zhang     }
212d4002b98SHong Zhang   }
21308401ef6SPierre Jolivet   PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n);
2149566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &auglyrmapd));
215d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
216ad540459SPierre Jolivet     if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i;
217d4002b98SHong Zhang   }
2189566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapd));
2199566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));
2209566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices));
221ad540459SPierre Jolivet   for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1;
222d4002b98SHong Zhang   no = inA->rmap->mapping->n - nt;
2239566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
224d4002b98SHong Zhang   nt = 0;
225d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
226d4002b98SHong Zhang     if (lindices[inA->rmap->mapping->indices[i]]) {
227d4002b98SHong Zhang       nt++;
228d4002b98SHong Zhang       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
229d4002b98SHong Zhang     }
230d4002b98SHong Zhang   }
23108401ef6SPierre Jolivet   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
2329566063dSJacob Faibussowitsch   PetscCall(PetscFree(lindices));
2339566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nt + 1, &auglyrmapo));
234d4002b98SHong Zhang   for (i = 0; i < inA->rmap->mapping->n; i++) {
235ad540459SPierre Jolivet     if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i;
236d4002b98SHong Zhang   }
2379566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapo));
2389566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo));
239d4002b98SHong Zhang   PetscFunctionReturn(0);
240d4002b98SHong Zhang }
241d4002b98SHong Zhang 
2429371c9d4SSatish Balay PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale) {
243d4002b98SHong Zhang   Mat_MPISELL       *a = (Mat_MPISELL *)A->data; /*access private part of matrix */
244d4002b98SHong Zhang   PetscInt           n, i;
245d4002b98SHong Zhang   PetscScalar       *d, *o;
246d4002b98SHong Zhang   const PetscScalar *s;
247d4002b98SHong Zhang 
248d4002b98SHong Zhang   PetscFunctionBegin;
24948a46eb9SPierre Jolivet   if (!auglyrmapd) PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A, scale));
2509566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(scale, &s));
2519566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(auglydd, &n));
2529566063dSJacob Faibussowitsch   PetscCall(VecGetArray(auglydd, &d));
2539371c9d4SSatish Balay   for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
2549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(auglydd, &d));
255d4002b98SHong Zhang   /* column scale "diagonal" portion of local matrix */
2569566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->A, NULL, auglydd));
2579566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(auglyoo, &n));
2589566063dSJacob Faibussowitsch   PetscCall(VecGetArray(auglyoo, &o));
2599371c9d4SSatish Balay   for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
2609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(scale, &s));
2619566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(auglyoo, &o));
262d4002b98SHong Zhang   /* column scale "off-diagonal" portion of local matrix */
2639566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
264d4002b98SHong Zhang   PetscFunctionReturn(0);
265d4002b98SHong Zhang }
266