xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision ba38deedd2838b0f18e3744cb0bafd6392690fde)
1be1d678aSKris Buschelman 
28016bdd1SSatish Balay /*
3d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
48016bdd1SSatish Balay */
5c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h>
6af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7bba1ac68SSatish Balay 
8d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
9d71ae5a4SJacob Faibussowitsch {
10d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
11d9653453SSatish Balay   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)(baij->B->data);
12b24ad042SBarry Smith   PetscInt     i, j, *aj = B->j, ec = 0, *garray;
13d0f46423SBarry Smith   PetscInt     bs = mat->rmap->bs, *stmp;
148016bdd1SSatish Balay   IS           from, to;
158016bdd1SSatish Balay   Vec          gvec;
16aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
17eec179cfSJacob Faibussowitsch   PetscHMapI    gid1_lid1 = NULL;
18eec179cfSJacob Faibussowitsch   PetscHashIter tpos;
19b24ad042SBarry Smith   PetscInt      gid, lid;
206f531f54SSatish Balay #else
21b24ad042SBarry Smith   PetscInt Nbs = baij->Nbs, *indices;
2273a2e727SSatish Balay #endif
238016bdd1SSatish Balay 
243a40ed3dSBarry Smith   PetscFunctionBegin;
25aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
2673a2e727SSatish Balay   /* use a table - Mark Adams */
27eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1));
2873a2e727SSatish Balay   for (i = 0; i < B->mbs; i++) {
2973a2e727SSatish Balay     for (j = 0; j < B->ilen[i]; j++) {
30b24ad042SBarry Smith       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
31eec179cfSJacob Faibussowitsch       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
32fa46199cSSatish Balay       if (!data) {
3373a2e727SSatish Balay         /* one based table */
34c76ffc5fSJacob Faibussowitsch         PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
3573a2e727SSatish Balay       }
3673a2e727SSatish Balay     }
3773a2e727SSatish Balay   }
3873a2e727SSatish Balay   /* form array of columns we need */
399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
40eec179cfSJacob Faibussowitsch   PetscHashIterBegin(gid1_lid1, tpos);
41eec179cfSJacob Faibussowitsch   while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
42eec179cfSJacob Faibussowitsch     PetscHashIterGetKey(gid1_lid1, tpos, gid);
43eec179cfSJacob Faibussowitsch     PetscHashIterGetVal(gid1_lid1, tpos, lid);
44eec179cfSJacob Faibussowitsch     PetscHashIterNext(gid1_lid1, tpos);
459371c9d4SSatish Balay     gid--;
469371c9d4SSatish Balay     lid--;
4773a2e727SSatish Balay     garray[lid] = gid;
4873a2e727SSatish Balay   }
499566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(ec, garray));
50eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIClear(gid1_lid1));
51c76ffc5fSJacob Faibussowitsch   for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
5273a2e727SSatish Balay   /* compact out the extra columns in B */
5373a2e727SSatish Balay   for (i = 0; i < B->mbs; i++) {
5473a2e727SSatish Balay     for (j = 0; j < B->ilen[i]; j++) {
55b24ad042SBarry Smith       PetscInt gid1 = aj[B->i[i] + j] + 1;
56eec179cfSJacob Faibussowitsch       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
57fa46199cSSatish Balay       lid--;
5873a2e727SSatish Balay       aj[B->i[i] + j] = lid;
5973a2e727SSatish Balay     }
6073a2e727SSatish Balay   }
6173a2e727SSatish Balay   B->nbs = ec;
629566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
639566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
64eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&gid1_lid1));
6573a2e727SSatish Balay #else
66435da068SBarry Smith   /* Make an array as long as the number of columns */
67d9653453SSatish Balay   /* mark those columns that are in baij->B */
689566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Nbs, &indices));
69d9653453SSatish Balay   for (i = 0; i < B->mbs; i++) {
708016bdd1SSatish Balay     for (j = 0; j < B->ilen[i]; j++) {
71d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
72d9653453SSatish Balay       indices[aj[B->i[i] + j]] = 1;
738016bdd1SSatish Balay     }
748016bdd1SSatish Balay   }
758016bdd1SSatish Balay 
768016bdd1SSatish Balay   /* form array of columns we need */
779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
788016bdd1SSatish Balay   ec = 0;
79d9653453SSatish Balay   for (i = 0; i < Nbs; i++) {
80ad540459SPierre Jolivet     if (indices[i]) garray[ec++] = i;
818016bdd1SSatish Balay   }
828016bdd1SSatish Balay 
838016bdd1SSatish Balay   /* make indices now point into garray */
84ad540459SPierre Jolivet   for (i = 0; i < ec; i++) indices[garray[i]] = i;
858016bdd1SSatish Balay 
868016bdd1SSatish Balay   /* compact out the extra columns in B */
87d9653453SSatish Balay   for (i = 0; i < B->mbs; i++) {
88ad540459SPierre Jolivet     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
898016bdd1SSatish Balay   }
90d9653453SSatish Balay   B->nbs = ec;
919566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
929566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
939566063dSJacob Faibussowitsch   PetscCall(PetscFree(indices));
9473a2e727SSatish Balay #endif
958016bdd1SSatish Balay 
968016bdd1SSatish Balay   /* create local vector that is used to scatter into */
979566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec));
988016bdd1SSatish Balay 
99c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
1009566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
101c16cb8f2SBarry Smith 
1029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &stmp));
10326fbe8dcSKarl Rupp   for (i = 0; i < ec; i++) stmp[i] = i;
1049566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to));
1058016bdd1SSatish Balay 
1068016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
1079566063dSJacob Faibussowitsch   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
1088016bdd1SSatish Balay 
1099566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx));
1109566063dSJacob Faibussowitsch   PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
11190f02eecSBarry Smith 
112d9653453SSatish Balay   baij->garray = garray;
11326fbe8dcSKarl Rupp 
1149566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1159566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
1169566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&gvec));
1173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1188016bdd1SSatish Balay }
1198016bdd1SSatish Balay 
1208016bdd1SSatish Balay /*
121d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1228016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1238016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1248016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1258016bdd1SSatish Balay 
1268016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1278016bdd1SSatish Balay    they are sloppy.
1288016bdd1SSatish Balay */
129d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
130d71ae5a4SJacob Faibussowitsch {
131d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
132d9653453SSatish Balay   Mat          B    = baij->B, Bnew;
1337c612885SStefano Zampini   Mat_SeqBAIJ *Bbaij;
1347c612885SStefano Zampini   PetscInt     i, j, mbs, n = A->cmap->N, col, *garray = baij->garray;
1357c612885SStefano Zampini   PetscInt     bs2 = baij->bs2, *nz = NULL, m = A->rmap->n;
1367c612885SStefano Zampini   MatScalar   *a, *atmp;
13797e5c40aSBarry Smith 
1383a40ed3dSBarry Smith   PetscFunctionBegin;
1398016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
1409371c9d4SSatish Balay   PetscCall(VecDestroy(&baij->lvec));
1419371c9d4SSatish Balay   PetscCall(VecScatterDestroy(&baij->Mvctx));
142d9653453SSatish Balay   if (baij->colmap) {
143aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
144eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapIDestroy(&baij->colmap));
14548e59246SSatish Balay #else
1469566063dSJacob Faibussowitsch     PetscCall(PetscFree(baij->colmap));
14748e59246SSatish Balay #endif
1488016bdd1SSatish Balay   }
1498016bdd1SSatish Balay 
1508016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1517c612885SStefano Zampini   if (B) {
1529566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1539566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1547c612885SStefano Zampini     Bbaij = (Mat_SeqBAIJ *)B->data;
1557c612885SStefano Zampini     mbs   = Bbaij->mbs;
1567c612885SStefano Zampini     a     = Bbaij->a;
1578016bdd1SSatish Balay 
1588016bdd1SSatish Balay     /* invent new B and copy stuff over */
1599566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(mbs, &nz));
160ad540459SPierre Jolivet     for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
1619566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew));
1629566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bnew, m, n, m, n));
1639566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
1649566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
16577341eacSDmitry Karpeev     /*
16677341eacSDmitry Karpeev      Ensure that B's nonzerostate is monotonically increasing.
16777341eacSDmitry Karpeev      Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
16877341eacSDmitry Karpeev      */
16977341eacSDmitry Karpeev     Bnew->nonzerostate = B->nonzerostate;
1707c612885SStefano Zampini     if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
1717c612885SStefano Zampini       ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
1727c612885SStefano Zampini     }
173d9653453SSatish Balay 
1747c612885SStefano Zampini     PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE));
175bba1ac68SSatish Balay     for (i = 0; i < mbs; i++) {
176bba1ac68SSatish Balay       for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
177bba1ac68SSatish Balay         col  = garray[Bbaij->j[j]];
1783eda8832SBarry Smith         atmp = a + j * bs2;
1799566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode));
1808016bdd1SSatish Balay       }
1818016bdd1SSatish Balay     }
1829566063dSJacob Faibussowitsch     PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE));
183bba1ac68SSatish Balay 
1849566063dSJacob Faibussowitsch     PetscCall(PetscFree(nz));
1859566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
18626fbe8dcSKarl Rupp 
187d9653453SSatish Balay     baij->B = Bnew;
1887c612885SStefano Zampini   }
1897c612885SStefano Zampini   PetscCall(PetscFree(baij->garray));
1908016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
1916a719282SBarry Smith   A->assembled     = PETSC_FALSE;
1923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1938016bdd1SSatish Balay }
1948016bdd1SSatish Balay 
19504f1ad80SBarry Smith /*      ugly stuff added for Glenn someday we should fix this up */
19604f1ad80SBarry Smith 
197f4259b30SLisandro Dalcin static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
198f4259b30SLisandro Dalcin static Vec       uglydd = NULL, uglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
19904f1ad80SBarry Smith 
200*ba38deedSJacob Faibussowitsch static PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
201d71ae5a4SJacob Faibussowitsch {
20204f1ad80SBarry Smith   Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */
20304f1ad80SBarry Smith   Mat_SeqBAIJ *B   = (Mat_SeqBAIJ *)ina->B->data;
204d0f46423SBarry Smith   PetscInt     bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices;
205b24ad042SBarry Smith   PetscInt    *r_rmapd, *r_rmapo;
20604f1ad80SBarry Smith 
20704f1ad80SBarry Smith   PetscFunctionBegin;
2089566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
2099566063dSJacob Faibussowitsch   PetscCall(MatGetSize(ina->A, NULL, &n));
2109566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
21104f1ad80SBarry Smith   nt = 0;
21245b6f7e9SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
21345b6f7e9SBarry Smith     if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
21404f1ad80SBarry Smith       nt++;
21545b6f7e9SBarry Smith       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
21604f1ad80SBarry Smith     }
21704f1ad80SBarry Smith   }
21808401ef6SPierre Jolivet   PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
2199566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &uglyrmapd));
22045b6f7e9SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
22104f1ad80SBarry Smith     if (r_rmapd[i]) {
222ad540459SPierre Jolivet       for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
22304f1ad80SBarry Smith     }
22404f1ad80SBarry Smith   }
2259566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapd));
2269566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd));
22704f1ad80SBarry Smith 
2289566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices));
229ad540459SPierre Jolivet   for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1;
23045b6f7e9SBarry Smith   no = inA->rmap->mapping->n - nt;
2319566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
23204f1ad80SBarry Smith   nt = 0;
23345b6f7e9SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
23445b6f7e9SBarry Smith     if (lindices[inA->rmap->mapping->indices[i]]) {
23504f1ad80SBarry Smith       nt++;
23645b6f7e9SBarry Smith       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
23704f1ad80SBarry Smith     }
23804f1ad80SBarry Smith   }
23908401ef6SPierre Jolivet   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
2409566063dSJacob Faibussowitsch   PetscCall(PetscFree(lindices));
2419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo));
24245b6f7e9SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
24304f1ad80SBarry Smith     if (r_rmapo[i]) {
244ad540459SPierre Jolivet       for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
24504f1ad80SBarry Smith     }
24604f1ad80SBarry Smith   }
2479566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapo));
2489566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo));
2493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25004f1ad80SBarry Smith }
25104f1ad80SBarry Smith 
252d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale)
253d71ae5a4SJacob Faibussowitsch {
25404f1ad80SBarry Smith   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */
255b24ad042SBarry Smith   PetscInt           n, i;
256bca11509SBarry Smith   PetscScalar       *d, *o;
257bca11509SBarry Smith   const PetscScalar *s;
25804f1ad80SBarry Smith 
25904f1ad80SBarry Smith   PetscFunctionBegin;
26048a46eb9SPierre Jolivet   if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale));
2612cd6534aSBarry Smith 
2629566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(scale, &s));
26304f1ad80SBarry Smith 
2649566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(uglydd, &n));
2659566063dSJacob Faibussowitsch   PetscCall(VecGetArray(uglydd, &d));
2669371c9d4SSatish Balay   for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
2679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(uglydd, &d));
26804f1ad80SBarry Smith   /* column scale "diagonal" portion of local matrix */
2699566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->A, NULL, uglydd));
27004f1ad80SBarry Smith 
2719566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(uglyoo, &n));
2729566063dSJacob Faibussowitsch   PetscCall(VecGetArray(uglyoo, &o));
2739371c9d4SSatish Balay   for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
2749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(scale, &s));
2759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(uglyoo, &o));
27604f1ad80SBarry Smith   /* column scale "off-diagonal" portion of local matrix */
2779566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->B, NULL, uglyoo));
2783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
27904f1ad80SBarry Smith }
280