1be1d678aSKris Buschelman 28016bdd1SSatish Balay /* 3d9653453SSatish Balay Support for the parallel BAIJ matrix vector multiply 48016bdd1SSatish Balay */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> 6af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7bba1ac68SSatish Balay 8d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9d71ae5a4SJacob Faibussowitsch { 10d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 11d9653453SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(baij->B->data); 12b24ad042SBarry Smith PetscInt i, j, *aj = B->j, ec = 0, *garray; 13d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, *stmp; 148016bdd1SSatish Balay IS from, to; 158016bdd1SSatish Balay Vec gvec; 16aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 17*eec179cfSJacob Faibussowitsch PetscHMapI gid1_lid1 = NULL; 18*eec179cfSJacob Faibussowitsch PetscHashIter tpos; 19b24ad042SBarry Smith PetscInt gid, lid; 206f531f54SSatish Balay #else 21b24ad042SBarry Smith PetscInt Nbs = baij->Nbs, *indices; 2273a2e727SSatish Balay #endif 238016bdd1SSatish Balay 243a40ed3dSBarry Smith PetscFunctionBegin; 25aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 2673a2e727SSatish Balay /* use a table - Mark Adams */ 27*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1)); 2873a2e727SSatish Balay for (i = 0; i < B->mbs; i++) { 2973a2e727SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 30b24ad042SBarry Smith PetscInt data, gid1 = aj[B->i[i] + j] + 1; 31*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 32fa46199cSSatish Balay if (!data) { 3373a2e727SSatish Balay /* one based table */ 34*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapISetWithMode(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 3573a2e727SSatish Balay } 3673a2e727SSatish Balay } 3773a2e727SSatish Balay } 3873a2e727SSatish Balay /* form array of columns we need */ 399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 40*eec179cfSJacob Faibussowitsch PetscHashIterBegin(gid1_lid1, tpos); 41*eec179cfSJacob Faibussowitsch while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 42*eec179cfSJacob Faibussowitsch PetscHashIterGetKey(gid1_lid1, tpos, gid); 43*eec179cfSJacob Faibussowitsch PetscHashIterGetVal(gid1_lid1, tpos, lid); 44*eec179cfSJacob Faibussowitsch PetscHashIterNext(gid1_lid1, tpos); 459371c9d4SSatish Balay gid--; 469371c9d4SSatish Balay lid--; 4773a2e727SSatish Balay garray[lid] = gid; 4873a2e727SSatish Balay } 499566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); 50*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIClear(gid1_lid1)); 51*eec179cfSJacob Faibussowitsch for (i = 0; i < ec; i++) PetscCall(PetscHMapISetWithMode(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 5273a2e727SSatish Balay /* compact out the extra columns in B */ 5373a2e727SSatish Balay for (i = 0; i < B->mbs; i++) { 5473a2e727SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 55b24ad042SBarry Smith PetscInt gid1 = aj[B->i[i] + j] + 1; 56*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 57fa46199cSSatish Balay lid--; 5873a2e727SSatish Balay aj[B->i[i] + j] = lid; 5973a2e727SSatish Balay } 6073a2e727SSatish Balay } 6173a2e727SSatish Balay B->nbs = ec; 629566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 639566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 64*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&gid1_lid1)); 6573a2e727SSatish Balay #else 66435da068SBarry Smith /* Make an array as long as the number of columns */ 67d9653453SSatish Balay /* mark those columns that are in baij->B */ 689566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Nbs, &indices)); 69d9653453SSatish Balay for (i = 0; i < B->mbs; i++) { 708016bdd1SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 71d9653453SSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 72d9653453SSatish Balay indices[aj[B->i[i] + j]] = 1; 738016bdd1SSatish Balay } 748016bdd1SSatish Balay } 758016bdd1SSatish Balay 768016bdd1SSatish Balay /* form array of columns we need */ 779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 788016bdd1SSatish Balay ec = 0; 79d9653453SSatish Balay for (i = 0; i < Nbs; i++) { 80ad540459SPierre Jolivet if (indices[i]) garray[ec++] = i; 818016bdd1SSatish Balay } 828016bdd1SSatish Balay 838016bdd1SSatish Balay /* make indices now point into garray */ 84ad540459SPierre Jolivet for (i = 0; i < ec; i++) indices[garray[i]] = i; 858016bdd1SSatish Balay 868016bdd1SSatish Balay /* compact out the extra columns in B */ 87d9653453SSatish Balay for (i = 0; i < B->mbs; i++) { 88ad540459SPierre Jolivet for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 898016bdd1SSatish Balay } 90d9653453SSatish Balay B->nbs = ec; 919566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 929566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 939566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 9473a2e727SSatish Balay #endif 958016bdd1SSatish Balay 968016bdd1SSatish Balay /* create local vector that is used to scatter into */ 979566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec)); 988016bdd1SSatish Balay 99c16cb8f2SBarry Smith /* create two temporary index sets for building scatter-gather */ 1009566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 101c16cb8f2SBarry Smith 1029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &stmp)); 10326fbe8dcSKarl Rupp for (i = 0; i < ec; i++) stmp[i] = i; 1049566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to)); 1058016bdd1SSatish Balay 1068016bdd1SSatish Balay /* create temporary global vector to generate scatter context */ 1079566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 1088016bdd1SSatish Balay 1099566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx)); 1109566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 11190f02eecSBarry Smith 112d9653453SSatish Balay baij->garray = garray; 11326fbe8dcSKarl Rupp 1149566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1159566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1169566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1173a40ed3dSBarry Smith PetscFunctionReturn(0); 1188016bdd1SSatish Balay } 1198016bdd1SSatish Balay 1208016bdd1SSatish Balay /* 121d9653453SSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 1228016bdd1SSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 1238016bdd1SSatish Balay that require more communication in the matrix vector multiply. 1248016bdd1SSatish Balay Thus certain data-structures must be rebuilt. 1258016bdd1SSatish Balay 1268016bdd1SSatish Balay Kind of slow! But that's what application programmers get when 1278016bdd1SSatish Balay they are sloppy. 1288016bdd1SSatish Balay */ 129d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 130d71ae5a4SJacob Faibussowitsch { 131d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 132d9653453SSatish Balay Mat B = baij->B, Bnew; 133d9653453SSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 134d0f46423SBarry Smith PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 1354dfa11a4SJacob Faibussowitsch PetscInt bs2 = baij->bs2, *nz, m = A->rmap->n; 1363eda8832SBarry Smith MatScalar *a = Bbaij->a; 137dd6ea824SBarry Smith MatScalar *atmp; 13897e5c40aSBarry Smith 1393a40ed3dSBarry Smith PetscFunctionBegin; 1408016bdd1SSatish Balay /* free stuff related to matrix-vec multiply */ 1419371c9d4SSatish Balay PetscCall(VecDestroy(&baij->lvec)); 1429371c9d4SSatish Balay baij->lvec = NULL; 1439371c9d4SSatish Balay PetscCall(VecScatterDestroy(&baij->Mvctx)); 1449371c9d4SSatish Balay baij->Mvctx = NULL; 145d9653453SSatish Balay if (baij->colmap) { 146aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 147*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&baij->colmap)); 14848e59246SSatish Balay #else 1499566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->colmap)); 15048e59246SSatish Balay #endif 1518016bdd1SSatish Balay } 1528016bdd1SSatish Balay 1538016bdd1SSatish Balay /* make sure that B is assembled so we can access its values */ 1549566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1559566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1568016bdd1SSatish Balay 1578016bdd1SSatish Balay /* invent new B and copy stuff over */ 1589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs, &nz)); 159ad540459SPierre Jolivet for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 1609566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew)); 1619566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n)); 1629566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 1639566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 164b38c15b3SStefano Zampini if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 165b38c15b3SStefano Zampini ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 166b38c15b3SStefano Zampini } 16726fbe8dcSKarl Rupp 1689566063dSJacob Faibussowitsch PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE)); 16977341eacSDmitry Karpeev /* 17077341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing. 17177341eacSDmitry Karpeev Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 17277341eacSDmitry Karpeev */ 17377341eacSDmitry Karpeev Bnew->nonzerostate = B->nonzerostate; 174d9653453SSatish Balay 175bba1ac68SSatish Balay for (i = 0; i < mbs; i++) { 176bba1ac68SSatish Balay for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 177bba1ac68SSatish Balay col = garray[Bbaij->j[j]]; 1783eda8832SBarry Smith atmp = a + j * bs2; 1799566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode)); 1808016bdd1SSatish Balay } 1818016bdd1SSatish Balay } 1829566063dSJacob Faibussowitsch PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE)); 183bba1ac68SSatish Balay 1849566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 1859566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->garray)); 1869566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 18726fbe8dcSKarl Rupp 188d9653453SSatish Balay baij->B = Bnew; 1898016bdd1SSatish Balay A->was_assembled = PETSC_FALSE; 1906a719282SBarry Smith A->assembled = PETSC_FALSE; 1913a40ed3dSBarry Smith PetscFunctionReturn(0); 1928016bdd1SSatish Balay } 1938016bdd1SSatish Balay 19404f1ad80SBarry Smith /* ugly stuff added for Glenn someday we should fix this up */ 19504f1ad80SBarry Smith 196f4259b30SLisandro Dalcin static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 197f4259b30SLisandro Dalcin static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 19804f1ad80SBarry Smith 199d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) 200d71ae5a4SJacob Faibussowitsch { 20104f1ad80SBarry Smith Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */ 20204f1ad80SBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data; 203d0f46423SBarry Smith PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices; 204b24ad042SBarry Smith PetscInt *r_rmapd, *r_rmapo; 20504f1ad80SBarry Smith 20604f1ad80SBarry Smith PetscFunctionBegin; 2079566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 2089566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n)); 2099566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 21004f1ad80SBarry Smith nt = 0; 21145b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 21245b6f7e9SBarry Smith if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) { 21304f1ad80SBarry Smith nt++; 21445b6f7e9SBarry Smith r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 21504f1ad80SBarry Smith } 21604f1ad80SBarry Smith } 21708401ef6SPierre Jolivet PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n); 2189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &uglyrmapd)); 21945b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 22004f1ad80SBarry Smith if (r_rmapd[i]) { 221ad540459SPierre Jolivet for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j; 22204f1ad80SBarry Smith } 22304f1ad80SBarry Smith } 2249566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd)); 2259566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd)); 22604f1ad80SBarry Smith 2279566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices)); 228ad540459SPierre Jolivet for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1; 22945b6f7e9SBarry Smith no = inA->rmap->mapping->n - nt; 2309566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 23104f1ad80SBarry Smith nt = 0; 23245b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 23345b6f7e9SBarry Smith if (lindices[inA->rmap->mapping->indices[i]]) { 23404f1ad80SBarry Smith nt++; 23545b6f7e9SBarry Smith r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 23604f1ad80SBarry Smith } 23704f1ad80SBarry Smith } 23808401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 2399566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices)); 2409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo)); 24145b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 24204f1ad80SBarry Smith if (r_rmapo[i]) { 243ad540459SPierre Jolivet for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j; 24404f1ad80SBarry Smith } 24504f1ad80SBarry Smith } 2469566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo)); 2479566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo)); 24804f1ad80SBarry Smith PetscFunctionReturn(0); 24904f1ad80SBarry Smith } 25004f1ad80SBarry Smith 251d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale) 252d71ae5a4SJacob Faibussowitsch { 25392b32695SKris Buschelman /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 25492b32695SKris Buschelman 25592b32695SKris Buschelman PetscFunctionBegin; 256cac4c232SBarry Smith PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 25792b32695SKris Buschelman PetscFunctionReturn(0); 25892b32695SKris Buschelman } 25992b32695SKris Buschelman 260d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) 261d71ae5a4SJacob Faibussowitsch { 26204f1ad80SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */ 263b24ad042SBarry Smith PetscInt n, i; 264bca11509SBarry Smith PetscScalar *d, *o; 265bca11509SBarry Smith const PetscScalar *s; 26604f1ad80SBarry Smith 26704f1ad80SBarry Smith PetscFunctionBegin; 26848a46eb9SPierre Jolivet if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale)); 2692cd6534aSBarry Smith 2709566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s)); 27104f1ad80SBarry Smith 2729566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(uglydd, &n)); 2739566063dSJacob Faibussowitsch PetscCall(VecGetArray(uglydd, &d)); 2749371c9d4SSatish Balay for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 2759566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(uglydd, &d)); 27604f1ad80SBarry Smith /* column scale "diagonal" portion of local matrix */ 2779566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, uglydd)); 27804f1ad80SBarry Smith 2799566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(uglyoo, &n)); 2809566063dSJacob Faibussowitsch PetscCall(VecGetArray(uglyoo, &o)); 2819371c9d4SSatish Balay for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 2829566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s)); 2839566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(uglyoo, &o)); 28404f1ad80SBarry Smith /* column scale "off-diagonal" portion of local matrix */ 2859566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, uglyoo)); 28604f1ad80SBarry Smith PetscFunctionReturn(0); 28704f1ad80SBarry Smith } 288