1be1d678aSKris Buschelman 28016bdd1SSatish Balay /* 3d9653453SSatish Balay Support for the parallel BAIJ matrix vector multiply 48016bdd1SSatish Balay */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> 6af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7bba1ac68SSatish Balay 89371c9d4SSatish Balay PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) { 9d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10d9653453SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(baij->B->data); 11b24ad042SBarry Smith PetscInt i, j, *aj = B->j, ec = 0, *garray; 12d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, *stmp; 138016bdd1SSatish Balay IS from, to; 148016bdd1SSatish Balay Vec gvec; 15aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 160f5bd95cSBarry Smith PetscTable gid1_lid1; 170f5bd95cSBarry Smith PetscTablePosition tpos; 18b24ad042SBarry Smith PetscInt gid, lid; 196f531f54SSatish Balay #else 20b24ad042SBarry Smith PetscInt Nbs = baij->Nbs, *indices; 2173a2e727SSatish Balay #endif 228016bdd1SSatish Balay 233a40ed3dSBarry Smith PetscFunctionBegin; 24aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 2573a2e727SSatish Balay /* use a table - Mark Adams */ 269566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(B->mbs, baij->Nbs + 1, &gid1_lid1)); 2773a2e727SSatish Balay for (i = 0; i < B->mbs; i++) { 2873a2e727SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 29b24ad042SBarry Smith PetscInt data, gid1 = aj[B->i[i] + j] + 1; 309566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 31fa46199cSSatish Balay if (!data) { 3273a2e727SSatish Balay /* one based table */ 339566063dSJacob Faibussowitsch PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 3473a2e727SSatish Balay } 3573a2e727SSatish Balay } 3673a2e727SSatish Balay } 3773a2e727SSatish Balay /* form array of columns we need */ 389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 399566063dSJacob Faibussowitsch PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 4073a2e727SSatish Balay while (tpos) { 419566063dSJacob Faibussowitsch PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 429371c9d4SSatish Balay gid--; 439371c9d4SSatish Balay lid--; 4473a2e727SSatish Balay garray[lid] = gid; 4573a2e727SSatish Balay } 469566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); 479566063dSJacob Faibussowitsch PetscCall(PetscTableRemoveAll(gid1_lid1)); 4848a46eb9SPierre Jolivet for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 4973a2e727SSatish Balay /* compact out the extra columns in B */ 5073a2e727SSatish Balay for (i = 0; i < B->mbs; i++) { 5173a2e727SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 52b24ad042SBarry Smith PetscInt gid1 = aj[B->i[i] + j] + 1; 539566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 54fa46199cSSatish Balay lid--; 5573a2e727SSatish Balay aj[B->i[i] + j] = lid; 5673a2e727SSatish Balay } 5773a2e727SSatish Balay } 5873a2e727SSatish Balay B->nbs = ec; 599566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 609566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 619566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&gid1_lid1)); 6273a2e727SSatish Balay #else 63435da068SBarry Smith /* Make an array as long as the number of columns */ 64d9653453SSatish Balay /* mark those columns that are in baij->B */ 659566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Nbs, &indices)); 66d9653453SSatish Balay for (i = 0; i < B->mbs; i++) { 678016bdd1SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 68d9653453SSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 69d9653453SSatish Balay indices[aj[B->i[i] + j]] = 1; 708016bdd1SSatish Balay } 718016bdd1SSatish Balay } 728016bdd1SSatish Balay 738016bdd1SSatish Balay /* form array of columns we need */ 749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 758016bdd1SSatish Balay ec = 0; 76d9653453SSatish Balay for (i = 0; i < Nbs; i++) { 77ad540459SPierre Jolivet if (indices[i]) garray[ec++] = i; 788016bdd1SSatish Balay } 798016bdd1SSatish Balay 808016bdd1SSatish Balay /* make indices now point into garray */ 81ad540459SPierre Jolivet for (i = 0; i < ec; i++) indices[garray[i]] = i; 828016bdd1SSatish Balay 838016bdd1SSatish Balay /* compact out the extra columns in B */ 84d9653453SSatish Balay for (i = 0; i < B->mbs; i++) { 85ad540459SPierre Jolivet for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 868016bdd1SSatish Balay } 87d9653453SSatish Balay B->nbs = ec; 889566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 899566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 909566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 9173a2e727SSatish Balay #endif 928016bdd1SSatish Balay 938016bdd1SSatish Balay /* create local vector that is used to scatter into */ 949566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec)); 958016bdd1SSatish Balay 96c16cb8f2SBarry Smith /* create two temporary index sets for building scatter-gather */ 979566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 98c16cb8f2SBarry Smith 999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &stmp)); 10026fbe8dcSKarl Rupp for (i = 0; i < ec; i++) stmp[i] = i; 1019566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to)); 1028016bdd1SSatish Balay 1038016bdd1SSatish Balay /* create temporary global vector to generate scatter context */ 1049566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 1058016bdd1SSatish Balay 1069566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx)); 1079566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 10890f02eecSBarry Smith 109d9653453SSatish Balay baij->garray = garray; 11026fbe8dcSKarl Rupp 1119566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1129566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1139566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1143a40ed3dSBarry Smith PetscFunctionReturn(0); 1158016bdd1SSatish Balay } 1168016bdd1SSatish Balay 1178016bdd1SSatish Balay /* 118d9653453SSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 1198016bdd1SSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 1208016bdd1SSatish Balay that require more communication in the matrix vector multiply. 1218016bdd1SSatish Balay Thus certain data-structures must be rebuilt. 1228016bdd1SSatish Balay 1238016bdd1SSatish Balay Kind of slow! But that's what application programmers get when 1248016bdd1SSatish Balay they are sloppy. 1258016bdd1SSatish Balay */ 1269371c9d4SSatish Balay PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) { 127d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 128d9653453SSatish Balay Mat B = baij->B, Bnew; 129d9653453SSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 130d0f46423SBarry Smith PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 131*4dfa11a4SJacob Faibussowitsch PetscInt bs2 = baij->bs2, *nz, m = A->rmap->n; 1323eda8832SBarry Smith MatScalar *a = Bbaij->a; 133dd6ea824SBarry Smith MatScalar *atmp; 13497e5c40aSBarry Smith 1353a40ed3dSBarry Smith PetscFunctionBegin; 1368016bdd1SSatish Balay /* free stuff related to matrix-vec multiply */ 1379371c9d4SSatish Balay PetscCall(VecDestroy(&baij->lvec)); 1389371c9d4SSatish Balay baij->lvec = NULL; 1399371c9d4SSatish Balay PetscCall(VecScatterDestroy(&baij->Mvctx)); 1409371c9d4SSatish Balay baij->Mvctx = NULL; 141d9653453SSatish Balay if (baij->colmap) { 142aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 1439566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&baij->colmap)); 14448e59246SSatish Balay #else 1459566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->colmap)); 14648e59246SSatish Balay #endif 1478016bdd1SSatish Balay } 1488016bdd1SSatish Balay 1498016bdd1SSatish Balay /* make sure that B is assembled so we can access its values */ 1509566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1519566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1528016bdd1SSatish Balay 1538016bdd1SSatish Balay /* invent new B and copy stuff over */ 1549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs, &nz)); 155ad540459SPierre Jolivet for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 1569566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew)); 1579566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n)); 1589566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 1599566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 160b38c15b3SStefano Zampini if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 161b38c15b3SStefano Zampini ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 162b38c15b3SStefano Zampini } 16326fbe8dcSKarl Rupp 1649566063dSJacob Faibussowitsch PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE)); 16577341eacSDmitry Karpeev /* 16677341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing. 16777341eacSDmitry Karpeev Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 16877341eacSDmitry Karpeev */ 16977341eacSDmitry Karpeev Bnew->nonzerostate = B->nonzerostate; 170d9653453SSatish Balay 171bba1ac68SSatish Balay for (i = 0; i < mbs; i++) { 172bba1ac68SSatish Balay for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 173bba1ac68SSatish Balay col = garray[Bbaij->j[j]]; 1743eda8832SBarry Smith atmp = a + j * bs2; 1759566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode)); 1768016bdd1SSatish Balay } 1778016bdd1SSatish Balay } 1789566063dSJacob Faibussowitsch PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE)); 179bba1ac68SSatish Balay 1809566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 1819566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->garray)); 1829566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 18326fbe8dcSKarl Rupp 184d9653453SSatish Balay baij->B = Bnew; 1858016bdd1SSatish Balay A->was_assembled = PETSC_FALSE; 1866a719282SBarry Smith A->assembled = PETSC_FALSE; 1873a40ed3dSBarry Smith PetscFunctionReturn(0); 1888016bdd1SSatish Balay } 1898016bdd1SSatish Balay 19004f1ad80SBarry Smith /* ugly stuff added for Glenn someday we should fix this up */ 19104f1ad80SBarry Smith 192f4259b30SLisandro Dalcin static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 193f4259b30SLisandro Dalcin static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 19404f1ad80SBarry Smith 1959371c9d4SSatish Balay PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) { 19604f1ad80SBarry Smith Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */ 19704f1ad80SBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data; 198d0f46423SBarry Smith PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices; 199b24ad042SBarry Smith PetscInt *r_rmapd, *r_rmapo; 20004f1ad80SBarry Smith 20104f1ad80SBarry Smith PetscFunctionBegin; 2029566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 2039566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n)); 2049566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 20504f1ad80SBarry Smith nt = 0; 20645b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 20745b6f7e9SBarry Smith if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) { 20804f1ad80SBarry Smith nt++; 20945b6f7e9SBarry Smith r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 21004f1ad80SBarry Smith } 21104f1ad80SBarry Smith } 21208401ef6SPierre Jolivet PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n); 2139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &uglyrmapd)); 21445b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 21504f1ad80SBarry Smith if (r_rmapd[i]) { 216ad540459SPierre Jolivet for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j; 21704f1ad80SBarry Smith } 21804f1ad80SBarry Smith } 2199566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd)); 2209566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd)); 22104f1ad80SBarry Smith 2229566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices)); 223ad540459SPierre Jolivet for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1; 22445b6f7e9SBarry Smith no = inA->rmap->mapping->n - nt; 2259566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 22604f1ad80SBarry Smith nt = 0; 22745b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 22845b6f7e9SBarry Smith if (lindices[inA->rmap->mapping->indices[i]]) { 22904f1ad80SBarry Smith nt++; 23045b6f7e9SBarry Smith r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 23104f1ad80SBarry Smith } 23204f1ad80SBarry Smith } 23308401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 2349566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices)); 2359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo)); 23645b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 23704f1ad80SBarry Smith if (r_rmapo[i]) { 238ad540459SPierre Jolivet for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j; 23904f1ad80SBarry Smith } 24004f1ad80SBarry Smith } 2419566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo)); 2429566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo)); 24304f1ad80SBarry Smith PetscFunctionReturn(0); 24404f1ad80SBarry Smith } 24504f1ad80SBarry Smith 2469371c9d4SSatish Balay PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale) { 24792b32695SKris Buschelman /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 24892b32695SKris Buschelman 24992b32695SKris Buschelman PetscFunctionBegin; 250cac4c232SBarry Smith PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 25192b32695SKris Buschelman PetscFunctionReturn(0); 25292b32695SKris Buschelman } 25392b32695SKris Buschelman 2549371c9d4SSatish Balay PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) { 25504f1ad80SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */ 256b24ad042SBarry Smith PetscInt n, i; 257bca11509SBarry Smith PetscScalar *d, *o; 258bca11509SBarry Smith const PetscScalar *s; 25904f1ad80SBarry Smith 26004f1ad80SBarry Smith PetscFunctionBegin; 26148a46eb9SPierre Jolivet if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale)); 2622cd6534aSBarry Smith 2639566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s)); 26404f1ad80SBarry Smith 2659566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(uglydd, &n)); 2669566063dSJacob Faibussowitsch PetscCall(VecGetArray(uglydd, &d)); 2679371c9d4SSatish Balay for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 2689566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(uglydd, &d)); 26904f1ad80SBarry Smith /* column scale "diagonal" portion of local matrix */ 2709566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, uglydd)); 27104f1ad80SBarry Smith 2729566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(uglyoo, &n)); 2739566063dSJacob Faibussowitsch PetscCall(VecGetArray(uglyoo, &o)); 2749371c9d4SSatish Balay for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 2759566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s)); 2769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(uglyoo, &o)); 27704f1ad80SBarry Smith /* column scale "off-diagonal" portion of local matrix */ 2789566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, uglyoo)); 27904f1ad80SBarry Smith PetscFunctionReturn(0); 28004f1ad80SBarry Smith } 281