xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
1be1d678aSKris Buschelman 
28016bdd1SSatish Balay /*
3d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
48016bdd1SSatish Balay */
5c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h>
6af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7bba1ac68SSatish Balay 
89371c9d4SSatish Balay PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) {
9d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10d9653453SSatish Balay   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)(baij->B->data);
11b24ad042SBarry Smith   PetscInt     i, j, *aj = B->j, ec = 0, *garray;
12d0f46423SBarry Smith   PetscInt     bs = mat->rmap->bs, *stmp;
138016bdd1SSatish Balay   IS           from, to;
148016bdd1SSatish Balay   Vec          gvec;
15aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
160f5bd95cSBarry Smith   PetscTable         gid1_lid1;
170f5bd95cSBarry Smith   PetscTablePosition tpos;
18b24ad042SBarry Smith   PetscInt           gid, lid;
196f531f54SSatish Balay #else
20b24ad042SBarry Smith   PetscInt Nbs = baij->Nbs, *indices;
2173a2e727SSatish Balay #endif
228016bdd1SSatish Balay 
233a40ed3dSBarry Smith   PetscFunctionBegin;
24aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
2573a2e727SSatish Balay   /* use a table - Mark Adams */
269566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(B->mbs, baij->Nbs + 1, &gid1_lid1));
2773a2e727SSatish Balay   for (i = 0; i < B->mbs; i++) {
2873a2e727SSatish Balay     for (j = 0; j < B->ilen[i]; j++) {
29b24ad042SBarry Smith       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
309566063dSJacob Faibussowitsch       PetscCall(PetscTableFind(gid1_lid1, gid1, &data));
31fa46199cSSatish Balay       if (!data) {
3273a2e727SSatish Balay         /* one based table */
339566063dSJacob Faibussowitsch         PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES));
3473a2e727SSatish Balay       }
3573a2e727SSatish Balay     }
3673a2e727SSatish Balay   }
3773a2e727SSatish Balay   /* form array of columns we need */
389566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
399566063dSJacob Faibussowitsch   PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos));
4073a2e727SSatish Balay   while (tpos) {
419566063dSJacob Faibussowitsch     PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid));
429371c9d4SSatish Balay     gid--;
439371c9d4SSatish Balay     lid--;
4473a2e727SSatish Balay     garray[lid] = gid;
4573a2e727SSatish Balay   }
469566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(ec, garray));
479566063dSJacob Faibussowitsch   PetscCall(PetscTableRemoveAll(gid1_lid1));
4848a46eb9SPierre Jolivet   for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES));
4973a2e727SSatish Balay   /* compact out the extra columns in B */
5073a2e727SSatish Balay   for (i = 0; i < B->mbs; i++) {
5173a2e727SSatish Balay     for (j = 0; j < B->ilen[i]; j++) {
52b24ad042SBarry Smith       PetscInt gid1 = aj[B->i[i] + j] + 1;
539566063dSJacob Faibussowitsch       PetscCall(PetscTableFind(gid1_lid1, gid1, &lid));
54fa46199cSSatish Balay       lid--;
5573a2e727SSatish Balay       aj[B->i[i] + j] = lid;
5673a2e727SSatish Balay     }
5773a2e727SSatish Balay   }
5873a2e727SSatish Balay   B->nbs = ec;
599566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
609566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
619566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&gid1_lid1));
6273a2e727SSatish Balay #else
63435da068SBarry Smith   /* Make an array as long as the number of columns */
64d9653453SSatish Balay   /* mark those columns that are in baij->B */
659566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Nbs, &indices));
66d9653453SSatish Balay   for (i = 0; i < B->mbs; i++) {
678016bdd1SSatish Balay     for (j = 0; j < B->ilen[i]; j++) {
68d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
69d9653453SSatish Balay       indices[aj[B->i[i] + j]] = 1;
708016bdd1SSatish Balay     }
718016bdd1SSatish Balay   }
728016bdd1SSatish Balay 
738016bdd1SSatish Balay   /* form array of columns we need */
749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
758016bdd1SSatish Balay   ec = 0;
76d9653453SSatish Balay   for (i = 0; i < Nbs; i++) {
77ad540459SPierre Jolivet     if (indices[i]) garray[ec++] = i;
788016bdd1SSatish Balay   }
798016bdd1SSatish Balay 
808016bdd1SSatish Balay   /* make indices now point into garray */
81ad540459SPierre Jolivet   for (i = 0; i < ec; i++) indices[garray[i]] = i;
828016bdd1SSatish Balay 
838016bdd1SSatish Balay   /* compact out the extra columns in B */
84d9653453SSatish Balay   for (i = 0; i < B->mbs; i++) {
85ad540459SPierre Jolivet     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
868016bdd1SSatish Balay   }
87d9653453SSatish Balay   B->nbs = ec;
889566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&baij->B->cmap));
899566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
909566063dSJacob Faibussowitsch   PetscCall(PetscFree(indices));
9173a2e727SSatish Balay #endif
928016bdd1SSatish Balay 
938016bdd1SSatish Balay   /* create local vector that is used to scatter into */
949566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec));
958016bdd1SSatish Balay 
96c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
979566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
98c16cb8f2SBarry Smith 
999566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &stmp));
10026fbe8dcSKarl Rupp   for (i = 0; i < ec; i++) stmp[i] = i;
1019566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to));
1028016bdd1SSatish Balay 
1038016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
1049566063dSJacob Faibussowitsch   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
1058016bdd1SSatish Balay 
1069566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx));
1079566063dSJacob Faibussowitsch   PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
10890f02eecSBarry Smith 
109d9653453SSatish Balay   baij->garray = garray;
11026fbe8dcSKarl Rupp 
1119566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1129566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
1139566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&gvec));
1143a40ed3dSBarry Smith   PetscFunctionReturn(0);
1158016bdd1SSatish Balay }
1168016bdd1SSatish Balay 
1178016bdd1SSatish Balay /*
118d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1198016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1208016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1218016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1228016bdd1SSatish Balay 
1238016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1248016bdd1SSatish Balay    they are sloppy.
1258016bdd1SSatish Balay */
1269371c9d4SSatish Balay PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) {
127d9653453SSatish Balay   Mat_MPIBAIJ *baij  = (Mat_MPIBAIJ *)A->data;
128d9653453SSatish Balay   Mat          B     = baij->B, Bnew;
129d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data;
130d0f46423SBarry Smith   PetscInt     i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
131*4dfa11a4SJacob Faibussowitsch   PetscInt     bs2 = baij->bs2, *nz, m = A->rmap->n;
1323eda8832SBarry Smith   MatScalar   *a = Bbaij->a;
133dd6ea824SBarry Smith   MatScalar   *atmp;
13497e5c40aSBarry Smith 
1353a40ed3dSBarry Smith   PetscFunctionBegin;
1368016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
1379371c9d4SSatish Balay   PetscCall(VecDestroy(&baij->lvec));
1389371c9d4SSatish Balay   baij->lvec = NULL;
1399371c9d4SSatish Balay   PetscCall(VecScatterDestroy(&baij->Mvctx));
1409371c9d4SSatish Balay   baij->Mvctx = NULL;
141d9653453SSatish Balay   if (baij->colmap) {
142aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
1439566063dSJacob Faibussowitsch     PetscCall(PetscTableDestroy(&baij->colmap));
14448e59246SSatish Balay #else
1459566063dSJacob Faibussowitsch     PetscCall(PetscFree(baij->colmap));
14648e59246SSatish Balay #endif
1478016bdd1SSatish Balay   }
1488016bdd1SSatish Balay 
1498016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1509566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1519566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1528016bdd1SSatish Balay 
1538016bdd1SSatish Balay   /* invent new B and copy stuff over */
1549566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs, &nz));
155ad540459SPierre Jolivet   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
1569566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew));
1579566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bnew, m, n, m, n));
1589566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
1599566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
160b38c15b3SStefano Zampini   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
161b38c15b3SStefano Zampini     ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
162b38c15b3SStefano Zampini   }
16326fbe8dcSKarl Rupp 
1649566063dSJacob Faibussowitsch   PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE));
16577341eacSDmitry Karpeev   /*
16677341eacSDmitry Karpeev    Ensure that B's nonzerostate is monotonically increasing.
16777341eacSDmitry Karpeev    Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
16877341eacSDmitry Karpeev    */
16977341eacSDmitry Karpeev   Bnew->nonzerostate = B->nonzerostate;
170d9653453SSatish Balay 
171bba1ac68SSatish Balay   for (i = 0; i < mbs; i++) {
172bba1ac68SSatish Balay     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
173bba1ac68SSatish Balay       col  = garray[Bbaij->j[j]];
1743eda8832SBarry Smith       atmp = a + j * bs2;
1759566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode));
1768016bdd1SSatish Balay     }
1778016bdd1SSatish Balay   }
1789566063dSJacob Faibussowitsch   PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE));
179bba1ac68SSatish Balay 
1809566063dSJacob Faibussowitsch   PetscCall(PetscFree(nz));
1819566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->garray));
1829566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
18326fbe8dcSKarl Rupp 
184d9653453SSatish Balay   baij->B          = Bnew;
1858016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
1866a719282SBarry Smith   A->assembled     = PETSC_FALSE;
1873a40ed3dSBarry Smith   PetscFunctionReturn(0);
1888016bdd1SSatish Balay }
1898016bdd1SSatish Balay 
19004f1ad80SBarry Smith /*      ugly stuff added for Glenn someday we should fix this up */
19104f1ad80SBarry Smith 
192f4259b30SLisandro Dalcin static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
193f4259b30SLisandro Dalcin static Vec       uglydd = NULL, uglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
19404f1ad80SBarry Smith 
1959371c9d4SSatish Balay PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) {
19604f1ad80SBarry Smith   Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */
19704f1ad80SBarry Smith   Mat_SeqBAIJ *B   = (Mat_SeqBAIJ *)ina->B->data;
198d0f46423SBarry Smith   PetscInt     bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices;
199b24ad042SBarry Smith   PetscInt    *r_rmapd, *r_rmapo;
20004f1ad80SBarry Smith 
20104f1ad80SBarry Smith   PetscFunctionBegin;
2029566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
2039566063dSJacob Faibussowitsch   PetscCall(MatGetSize(ina->A, NULL, &n));
2049566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
20504f1ad80SBarry Smith   nt = 0;
20645b6f7e9SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
20745b6f7e9SBarry Smith     if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
20804f1ad80SBarry Smith       nt++;
20945b6f7e9SBarry Smith       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
21004f1ad80SBarry Smith     }
21104f1ad80SBarry Smith   }
21208401ef6SPierre Jolivet   PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
2139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &uglyrmapd));
21445b6f7e9SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
21504f1ad80SBarry Smith     if (r_rmapd[i]) {
216ad540459SPierre Jolivet       for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
21704f1ad80SBarry Smith     }
21804f1ad80SBarry Smith   }
2199566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapd));
2209566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd));
22104f1ad80SBarry Smith 
2229566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices));
223ad540459SPierre Jolivet   for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1;
22445b6f7e9SBarry Smith   no = inA->rmap->mapping->n - nt;
2259566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
22604f1ad80SBarry Smith   nt = 0;
22745b6f7e9SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
22845b6f7e9SBarry Smith     if (lindices[inA->rmap->mapping->indices[i]]) {
22904f1ad80SBarry Smith       nt++;
23045b6f7e9SBarry Smith       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
23104f1ad80SBarry Smith     }
23204f1ad80SBarry Smith   }
23308401ef6SPierre Jolivet   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
2349566063dSJacob Faibussowitsch   PetscCall(PetscFree(lindices));
2359566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo));
23645b6f7e9SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
23704f1ad80SBarry Smith     if (r_rmapo[i]) {
238ad540459SPierre Jolivet       for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
23904f1ad80SBarry Smith     }
24004f1ad80SBarry Smith   }
2419566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapo));
2429566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo));
24304f1ad80SBarry Smith   PetscFunctionReturn(0);
24404f1ad80SBarry Smith }
24504f1ad80SBarry Smith 
2469371c9d4SSatish Balay PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale) {
24792b32695SKris Buschelman   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
24892b32695SKris Buschelman 
24992b32695SKris Buschelman   PetscFunctionBegin;
250cac4c232SBarry Smith   PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale));
25192b32695SKris Buschelman   PetscFunctionReturn(0);
25292b32695SKris Buschelman }
25392b32695SKris Buschelman 
2549371c9d4SSatish Balay PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) {
25504f1ad80SBarry Smith   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */
256b24ad042SBarry Smith   PetscInt           n, i;
257bca11509SBarry Smith   PetscScalar       *d, *o;
258bca11509SBarry Smith   const PetscScalar *s;
25904f1ad80SBarry Smith 
26004f1ad80SBarry Smith   PetscFunctionBegin;
26148a46eb9SPierre Jolivet   if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale));
2622cd6534aSBarry Smith 
2639566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(scale, &s));
26404f1ad80SBarry Smith 
2659566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(uglydd, &n));
2669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(uglydd, &d));
2679371c9d4SSatish Balay   for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
2689566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(uglydd, &d));
26904f1ad80SBarry Smith   /* column scale "diagonal" portion of local matrix */
2709566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->A, NULL, uglydd));
27104f1ad80SBarry Smith 
2729566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(uglyoo, &n));
2739566063dSJacob Faibussowitsch   PetscCall(VecGetArray(uglyoo, &o));
2749371c9d4SSatish Balay   for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
2759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(scale, &s));
2769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(uglyoo, &o));
27704f1ad80SBarry Smith   /* column scale "off-diagonal" portion of local matrix */
2789566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->B, NULL, uglyoo));
27904f1ad80SBarry Smith   PetscFunctionReturn(0);
28004f1ad80SBarry Smith }
281