xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
1a30f8f8cSSatish Balay 
2a30f8f8cSSatish Balay /*
3a30f8f8cSSatish Balay    Support for the parallel SBAIJ matrix vector multiply
4a30f8f8cSSatish Balay */
5c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
67cff1425SSatish Balay 
79371c9d4SSatish Balay PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) {
840781036SHong Zhang   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ *)mat->data;
940781036SHong Zhang   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ *)(sbaij->B->data);
10a11b0bdaSJunchao Zhang   PetscInt        Nbs = sbaij->Nbs, i, j, *aj = B->j, ec = 0, *garray, *sgarray;
11d0f46423SBarry Smith   PetscInt        bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
1240781036SHong Zhang   IS              from, to;
1340781036SHong Zhang   Vec             gvec;
14a11b0bdaSJunchao Zhang   PetscMPIInt     rank   = sbaij->rank, lsize;
15077aedafSJed Brown   PetscInt       *owners = sbaij->rangebs, *ec_owner, k;
16077aedafSJed Brown   const PetscInt *sowners;
1740781036SHong Zhang   PetscScalar    *ptr;
18a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE)
19a11b0bdaSJunchao Zhang   PetscTable         gid1_lid1; /* one-based gid to lid table */
20a11b0bdaSJunchao Zhang   PetscTablePosition tpos;
21a11b0bdaSJunchao Zhang   PetscInt           gid, lid;
22a11b0bdaSJunchao Zhang #else
23a11b0bdaSJunchao Zhang   PetscInt *indices;
24a11b0bdaSJunchao Zhang #endif
2540781036SHong Zhang 
2640781036SHong Zhang   PetscFunctionBegin;
27a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE)
289566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(mbs, Nbs + 1, &gid1_lid1));
29a11b0bdaSJunchao Zhang   for (i = 0; i < B->mbs; i++) {
30a11b0bdaSJunchao Zhang     for (j = 0; j < B->ilen[i]; j++) {
31a11b0bdaSJunchao Zhang       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
329566063dSJacob Faibussowitsch       PetscCall(PetscTableFind(gid1_lid1, gid1, &data));
339566063dSJacob Faibussowitsch       if (!data) PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES));
34a11b0bdaSJunchao Zhang     }
35a11b0bdaSJunchao Zhang   }
36a11b0bdaSJunchao Zhang   /* form array of columns we need */
379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
389566063dSJacob Faibussowitsch   PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos));
39a11b0bdaSJunchao Zhang   while (tpos) {
409566063dSJacob Faibussowitsch     PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid));
419371c9d4SSatish Balay     gid--;
429371c9d4SSatish Balay     lid--;
43a11b0bdaSJunchao Zhang     garray[lid] = gid;
44a11b0bdaSJunchao Zhang   }
459566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(ec, garray));
469566063dSJacob Faibussowitsch   PetscCall(PetscTableRemoveAll(gid1_lid1));
479566063dSJacob Faibussowitsch   for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES));
48a11b0bdaSJunchao Zhang   /* compact out the extra columns in B */
49a11b0bdaSJunchao Zhang   for (i = 0; i < B->mbs; i++) {
50a11b0bdaSJunchao Zhang     for (j = 0; j < B->ilen[i]; j++) {
51a11b0bdaSJunchao Zhang       PetscInt gid1 = aj[B->i[i] + j] + 1;
529566063dSJacob Faibussowitsch       PetscCall(PetscTableFind(gid1_lid1, gid1, &lid));
53a11b0bdaSJunchao Zhang       lid--;
54a11b0bdaSJunchao Zhang       aj[B->i[i] + j] = lid;
55a11b0bdaSJunchao Zhang     }
56a11b0bdaSJunchao Zhang   }
579566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&gid1_lid1));
589566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
59a11b0bdaSJunchao Zhang   for (i = j = 0; i < ec; i++) {
60a11b0bdaSJunchao Zhang     while (garray[i] >= owners[j + 1]) j++;
61a11b0bdaSJunchao Zhang     ec_owner[i] = j;
62a11b0bdaSJunchao Zhang   }
63a11b0bdaSJunchao Zhang #else
6440781036SHong Zhang   /* For the first stab we make an array as long as the number of columns */
6540781036SHong Zhang   /* mark those columns that are in sbaij->B */
669566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Nbs, &indices));
6740781036SHong Zhang   for (i = 0; i < mbs; i++) {
6840781036SHong Zhang     for (j = 0; j < B->ilen[i]; j++) {
6940781036SHong Zhang       if (!indices[aj[B->i[i] + j]]) ec++;
7040781036SHong Zhang       indices[aj[B->i[i] + j]] = 1;
7140781036SHong Zhang     }
7240781036SHong Zhang   }
7340781036SHong Zhang 
7440781036SHong Zhang   /* form arrays of columns we need */
759566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
7740781036SHong Zhang 
7840781036SHong Zhang   ec = 0;
79a11b0bdaSJunchao Zhang   for (j = 0; j < sbaij->size; j++) {
8040781036SHong Zhang     for (i = owners[j]; i < owners[j + 1]; i++) {
8140781036SHong Zhang       if (indices[i]) {
8240781036SHong Zhang         garray[ec]   = i;
8340781036SHong Zhang         ec_owner[ec] = j;
8440781036SHong Zhang         ec++;
8540781036SHong Zhang       }
8640781036SHong Zhang     }
8740781036SHong Zhang   }
8840781036SHong Zhang 
8940781036SHong Zhang   /* make indices now point into garray */
90b424e231SHong Zhang   for (i = 0; i < ec; i++) indices[garray[i]] = i;
9140781036SHong Zhang 
9240781036SHong Zhang   /* compact out the extra columns in B */
9340781036SHong Zhang   for (i = 0; i < mbs; i++) {
9440781036SHong Zhang     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
9540781036SHong Zhang   }
969566063dSJacob Faibussowitsch   PetscCall(PetscFree(indices));
97a11b0bdaSJunchao Zhang #endif
9840781036SHong Zhang   B->nbs = ec;
999566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
1009566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
10140781036SHong Zhang 
1029566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&sbaij->sMvctx));
10340781036SHong Zhang   /* create local vector that is used to scatter into */
1049566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
10540781036SHong Zhang 
10640781036SHong Zhang   /* create two temporary index sets for building scatter-gather */
1079566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * ec, &stmp));
1089566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
10926fbe8dcSKarl Rupp   for (i = 0; i < ec; i++) stmp[i] = i;
1109566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
11140781036SHong Zhang 
11293d1592dSHong Zhang   /* generate the scatter context
113bd096759SJunchao Zhang      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
1149566063dSJacob Faibussowitsch   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
1159566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
1169566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&gvec));
11740781036SHong Zhang 
11840781036SHong Zhang   sbaij->garray = garray;
11926fbe8dcSKarl Rupp 
1209566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1219566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
12240781036SHong Zhang 
12340781036SHong Zhang   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
12440781036SHong Zhang   lsize = (mbs + ec) * bs;
1259566063dSJacob Faibussowitsch   PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0));
1269566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
1279566063dSJacob Faibussowitsch   PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
12840781036SHong Zhang 
1299566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
13040781036SHong Zhang 
13140781036SHong Zhang   /* x index in the IS sfrom */
13240781036SHong Zhang   for (i = 0; i < ec; i++) {
13340781036SHong Zhang     j          = ec_owner[i];
13440781036SHong Zhang     sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
13540781036SHong Zhang   }
13640781036SHong Zhang   /* b index in the IS sfrom */
13740781036SHong Zhang   k = sowners[rank] / bs + mbs;
13840781036SHong Zhang   for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
1399566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
14040781036SHong Zhang 
14140781036SHong Zhang   /* x index in the IS sto */
14240781036SHong Zhang   k = sowners[rank] / bs + mbs;
143e82e9f6bSBarry Smith   for (i = 0; i < ec; i++) stmp[i] = (k + i);
14440781036SHong Zhang   /* b index in the IS sto */
145e82e9f6bSBarry Smith   for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
14640781036SHong Zhang 
1479566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
14840781036SHong Zhang 
1499566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
1509566063dSJacob Faibussowitsch   PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
15140781036SHong Zhang 
1529566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
1539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(sbaij->slvec1, &ptr));
1549566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
1559566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b));
1569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
15740781036SHong Zhang 
1589566063dSJacob Faibussowitsch   PetscCall(VecGetArray(sbaij->slvec0, &ptr));
1599566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b));
1609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
16140781036SHong Zhang 
1629566063dSJacob Faibussowitsch   PetscCall(PetscFree(stmp));
16340781036SHong Zhang 
1649566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1659566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
1669566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sgarray, ec_owner));
16740781036SHong Zhang   PetscFunctionReturn(0);
16840781036SHong Zhang }
16940781036SHong Zhang 
170a30f8f8cSSatish Balay /*
17101b2bd88SHong Zhang      Takes the local part of an already assembled MPISBAIJ matrix
172a30f8f8cSSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
173a30f8f8cSSatish Balay    that require more communication in the matrix vector multiply.
174a30f8f8cSSatish Balay    Thus certain data-structures must be rebuilt.
175a30f8f8cSSatish Balay 
176a30f8f8cSSatish Balay    Kind of slow! But that's what application programmers get when
177a30f8f8cSSatish Balay    they are sloppy.
178a30f8f8cSSatish Balay */
1799371c9d4SSatish Balay PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) {
1802896befeSSatish Balay   Mat_MPISBAIJ *baij  = (Mat_MPISBAIJ *)A->data;
181a30f8f8cSSatish Balay   Mat           B     = baij->B, Bnew;
182a30f8f8cSSatish Balay   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ *)B->data;
183d0f46423SBarry Smith   PetscInt      i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
184*4dfa11a4SJacob Faibussowitsch   PetscInt      k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
185a30f8f8cSSatish Balay   MatScalar    *a = Bbaij->a;
18687828ca2SBarry Smith   PetscScalar  *atmp;
187ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
18813f74950SBarry Smith   PetscInt l;
189a30f8f8cSSatish Balay #endif
190a30f8f8cSSatish Balay 
191a30f8f8cSSatish Balay   PetscFunctionBegin;
192ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
1939566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
19482502324SSatish Balay #endif
195a30f8f8cSSatish Balay   /* free stuff related to matrix-vec multiply */
1969566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->lvec));
1979566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&baij->Mvctx));
19801b2bd88SHong Zhang 
1999566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0));
2009566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0b));
2019566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1));
2029566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1a));
2039566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1b));
20401b2bd88SHong Zhang 
205a30f8f8cSSatish Balay   if (baij->colmap) {
206a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
2079566063dSJacob Faibussowitsch     PetscCall(PetscTableDestroy(&baij->colmap));
208a30f8f8cSSatish Balay #else
2099566063dSJacob Faibussowitsch     PetscCall(PetscFree(baij->colmap));
210a30f8f8cSSatish Balay #endif
211a30f8f8cSSatish Balay   }
212a30f8f8cSSatish Balay 
213a30f8f8cSSatish Balay   /* make sure that B is assembled so we can access its values */
2149566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2159566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
216a30f8f8cSSatish Balay 
217a30f8f8cSSatish Balay   /* invent new B and copy stuff over */
2189566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs, &nz));
219ad540459SPierre Jolivet   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
2209566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
2219566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bnew, m, n, m, n));
2229566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
2239566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
2249566063dSJacob Faibussowitsch   PetscCall(PetscFree(nz));
225a30f8f8cSSatish Balay 
226b38c15b3SStefano Zampini   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
227b38c15b3SStefano Zampini     ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
228b38c15b3SStefano Zampini   }
229b38c15b3SStefano Zampini 
23077341eacSDmitry Karpeev   /*
23177341eacSDmitry Karpeev    Ensure that B's nonzerostate is monotonically increasing.
23277341eacSDmitry Karpeev    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
23377341eacSDmitry Karpeev    */
23477341eacSDmitry Karpeev   Bnew->nonzerostate = B->nonzerostate;
2359566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bs, &rvals));
236a30f8f8cSSatish Balay   for (i = 0; i < mbs; i++) {
237a30f8f8cSSatish Balay     rvals[0] = bs * i;
23826fbe8dcSKarl Rupp     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
239a30f8f8cSSatish Balay     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
240a30f8f8cSSatish Balay       col = garray[Bbaij->j[j]] * bs;
241a30f8f8cSSatish Balay       for (k = 0; k < bs; k++) {
242ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
243a30f8f8cSSatish Balay         for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
244a30f8f8cSSatish Balay #else
24571730473SSatish Balay         atmp = a + j * bs2 + k * bs;
246a30f8f8cSSatish Balay #endif
2479566063dSJacob Faibussowitsch         PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
248a30f8f8cSSatish Balay         col++;
249a30f8f8cSSatish Balay       }
250a30f8f8cSSatish Balay     }
251a30f8f8cSSatish Balay   }
252ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
2539566063dSJacob Faibussowitsch   PetscCall(PetscFree(atmp));
254a30f8f8cSSatish Balay #endif
2559566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->garray));
25626fbe8dcSKarl Rupp 
257f4259b30SLisandro Dalcin   baij->garray = NULL;
25826fbe8dcSKarl Rupp 
2599566063dSJacob Faibussowitsch   PetscCall(PetscFree(rvals));
2609566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
26126fbe8dcSKarl Rupp 
262a30f8f8cSSatish Balay   baij->B = Bnew;
26326fbe8dcSKarl Rupp 
264a30f8f8cSSatish Balay   A->was_assembled = PETSC_FALSE;
265a30f8f8cSSatish Balay   PetscFunctionReturn(0);
266a30f8f8cSSatish Balay }
267