1be1d678aSKris Buschelman 28016bdd1SSatish Balay /* 3d9653453SSatish Balay Support for the parallel BAIJ matrix vector multiply 48016bdd1SSatish Balay */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> 6af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7bba1ac68SSatish Balay 8d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9d71ae5a4SJacob Faibussowitsch { 10d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 11d9653453SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(baij->B->data); 12b24ad042SBarry Smith PetscInt i, j, *aj = B->j, ec = 0, *garray; 13d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, *stmp; 148016bdd1SSatish Balay IS from, to; 158016bdd1SSatish Balay Vec gvec; 16aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 17eec179cfSJacob Faibussowitsch PetscHMapI gid1_lid1 = NULL; 18eec179cfSJacob Faibussowitsch PetscHashIter tpos; 19b24ad042SBarry Smith PetscInt gid, lid; 206f531f54SSatish Balay #else 21b24ad042SBarry Smith PetscInt Nbs = baij->Nbs, *indices; 2273a2e727SSatish Balay #endif 238016bdd1SSatish Balay 243a40ed3dSBarry Smith PetscFunctionBegin; 25aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 2673a2e727SSatish Balay /* use a table - Mark Adams */ 27eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1)); 2873a2e727SSatish Balay for (i = 0; i < B->mbs; i++) { 2973a2e727SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 30b24ad042SBarry Smith PetscInt data, gid1 = aj[B->i[i] + j] + 1; 31eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 32fa46199cSSatish Balay if (!data) { 3373a2e727SSatish Balay /* one based table */ 34c76ffc5fSJacob Faibussowitsch PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec)); 3573a2e727SSatish Balay } 3673a2e727SSatish Balay } 3773a2e727SSatish Balay } 3873a2e727SSatish Balay /* form array of columns we need */ 399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 40eec179cfSJacob Faibussowitsch PetscHashIterBegin(gid1_lid1, tpos); 41eec179cfSJacob Faibussowitsch while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 42eec179cfSJacob Faibussowitsch PetscHashIterGetKey(gid1_lid1, tpos, gid); 43eec179cfSJacob Faibussowitsch PetscHashIterGetVal(gid1_lid1, tpos, lid); 44eec179cfSJacob Faibussowitsch PetscHashIterNext(gid1_lid1, tpos); 459371c9d4SSatish Balay gid--; 469371c9d4SSatish Balay lid--; 4773a2e727SSatish Balay garray[lid] = gid; 4873a2e727SSatish Balay } 499566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); 50eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIClear(gid1_lid1)); 51c76ffc5fSJacob Faibussowitsch for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1)); 5273a2e727SSatish Balay /* compact out the extra columns in B */ 5373a2e727SSatish Balay for (i = 0; i < B->mbs; i++) { 5473a2e727SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 55b24ad042SBarry Smith PetscInt gid1 = aj[B->i[i] + j] + 1; 56eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 57fa46199cSSatish Balay lid--; 5873a2e727SSatish Balay aj[B->i[i] + j] = lid; 5973a2e727SSatish Balay } 6073a2e727SSatish Balay } 6173a2e727SSatish Balay B->nbs = ec; 629566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 639566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 64eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&gid1_lid1)); 6573a2e727SSatish Balay #else 66435da068SBarry Smith /* Make an array as long as the number of columns */ 67d9653453SSatish Balay /* mark those columns that are in baij->B */ 689566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Nbs, &indices)); 69d9653453SSatish Balay for (i = 0; i < B->mbs; i++) { 708016bdd1SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 71d9653453SSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 72d9653453SSatish Balay indices[aj[B->i[i] + j]] = 1; 738016bdd1SSatish Balay } 748016bdd1SSatish Balay } 758016bdd1SSatish Balay 768016bdd1SSatish Balay /* form array of columns we need */ 779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 788016bdd1SSatish Balay ec = 0; 79d9653453SSatish Balay for (i = 0; i < Nbs; i++) { 80ad540459SPierre Jolivet if (indices[i]) garray[ec++] = i; 818016bdd1SSatish Balay } 828016bdd1SSatish Balay 838016bdd1SSatish Balay /* make indices now point into garray */ 84ad540459SPierre Jolivet for (i = 0; i < ec; i++) indices[garray[i]] = i; 858016bdd1SSatish Balay 868016bdd1SSatish Balay /* compact out the extra columns in B */ 87d9653453SSatish Balay for (i = 0; i < B->mbs; i++) { 88ad540459SPierre Jolivet for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 898016bdd1SSatish Balay } 90d9653453SSatish Balay B->nbs = ec; 919566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 929566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 939566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 9473a2e727SSatish Balay #endif 958016bdd1SSatish Balay 968016bdd1SSatish Balay /* create local vector that is used to scatter into */ 979566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec)); 988016bdd1SSatish Balay 99c16cb8f2SBarry Smith /* create two temporary index sets for building scatter-gather */ 1009566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 101c16cb8f2SBarry Smith 1029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &stmp)); 10326fbe8dcSKarl Rupp for (i = 0; i < ec; i++) stmp[i] = i; 1049566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to)); 1058016bdd1SSatish Balay 1068016bdd1SSatish Balay /* create temporary global vector to generate scatter context */ 1079566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 1088016bdd1SSatish Balay 1099566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx)); 1109566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 11190f02eecSBarry Smith 112d9653453SSatish Balay baij->garray = garray; 11326fbe8dcSKarl Rupp 1149566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1159566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1169566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1188016bdd1SSatish Balay } 1198016bdd1SSatish Balay 1208016bdd1SSatish Balay /* 121d9653453SSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 1228016bdd1SSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 1238016bdd1SSatish Balay that require more communication in the matrix vector multiply. 1248016bdd1SSatish Balay Thus certain data-structures must be rebuilt. 1258016bdd1SSatish Balay 1268016bdd1SSatish Balay Kind of slow! But that's what application programmers get when 1278016bdd1SSatish Balay they are sloppy. 1288016bdd1SSatish Balay */ 129d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 130d71ae5a4SJacob Faibussowitsch { 131d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 132d9653453SSatish Balay Mat B = baij->B, Bnew; 1337c612885SStefano Zampini Mat_SeqBAIJ *Bbaij; 1347c612885SStefano Zampini PetscInt i, j, mbs, n = A->cmap->N, col, *garray = baij->garray; 1357c612885SStefano Zampini PetscInt bs2 = baij->bs2, *nz = NULL, m = A->rmap->n; 1367c612885SStefano Zampini MatScalar *a, *atmp; 13797e5c40aSBarry Smith 1383a40ed3dSBarry Smith PetscFunctionBegin; 1398016bdd1SSatish Balay /* free stuff related to matrix-vec multiply */ 1409371c9d4SSatish Balay PetscCall(VecDestroy(&baij->lvec)); 1419371c9d4SSatish Balay PetscCall(VecScatterDestroy(&baij->Mvctx)); 142d9653453SSatish Balay if (baij->colmap) { 143aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 144eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&baij->colmap)); 14548e59246SSatish Balay #else 1469566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->colmap)); 14748e59246SSatish Balay #endif 1488016bdd1SSatish Balay } 1498016bdd1SSatish Balay 1508016bdd1SSatish Balay /* make sure that B is assembled so we can access its values */ 1517c612885SStefano Zampini if (B) { 1529566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1539566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1547c612885SStefano Zampini Bbaij = (Mat_SeqBAIJ *)B->data; 1557c612885SStefano Zampini mbs = Bbaij->mbs; 1567c612885SStefano Zampini a = Bbaij->a; 1578016bdd1SSatish Balay 1588016bdd1SSatish Balay /* invent new B and copy stuff over */ 1599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs, &nz)); 160ad540459SPierre Jolivet for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 1619566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew)); 1629566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n)); 1639566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 1649566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 16577341eacSDmitry Karpeev /* 16677341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing. 16777341eacSDmitry Karpeev Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 16877341eacSDmitry Karpeev */ 16977341eacSDmitry Karpeev Bnew->nonzerostate = B->nonzerostate; 1707c612885SStefano Zampini if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 1717c612885SStefano Zampini ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 1727c612885SStefano Zampini } 173d9653453SSatish Balay 1747c612885SStefano Zampini PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE)); 175bba1ac68SSatish Balay for (i = 0; i < mbs; i++) { 176bba1ac68SSatish Balay for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 177bba1ac68SSatish Balay col = garray[Bbaij->j[j]]; 1783eda8832SBarry Smith atmp = a + j * bs2; 1799566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode)); 1808016bdd1SSatish Balay } 1818016bdd1SSatish Balay } 1829566063dSJacob Faibussowitsch PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE)); 183bba1ac68SSatish Balay 1849566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 1859566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 18626fbe8dcSKarl Rupp 187d9653453SSatish Balay baij->B = Bnew; 1887c612885SStefano Zampini } 1897c612885SStefano Zampini PetscCall(PetscFree(baij->garray)); 1908016bdd1SSatish Balay A->was_assembled = PETSC_FALSE; 1916a719282SBarry Smith A->assembled = PETSC_FALSE; 1923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1938016bdd1SSatish Balay } 1948016bdd1SSatish Balay 19504f1ad80SBarry Smith /* ugly stuff added for Glenn someday we should fix this up */ 19604f1ad80SBarry Smith 197f4259b30SLisandro Dalcin static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 198f4259b30SLisandro Dalcin static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 19904f1ad80SBarry Smith 200*ba38deedSJacob Faibussowitsch static PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) 201d71ae5a4SJacob Faibussowitsch { 20204f1ad80SBarry Smith Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */ 20304f1ad80SBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data; 204d0f46423SBarry Smith PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices; 205b24ad042SBarry Smith PetscInt *r_rmapd, *r_rmapo; 20604f1ad80SBarry Smith 20704f1ad80SBarry Smith PetscFunctionBegin; 2089566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 2099566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n)); 2109566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 21104f1ad80SBarry Smith nt = 0; 21245b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 21345b6f7e9SBarry Smith if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) { 21404f1ad80SBarry Smith nt++; 21545b6f7e9SBarry Smith r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 21604f1ad80SBarry Smith } 21704f1ad80SBarry Smith } 21808401ef6SPierre Jolivet PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n); 2199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &uglyrmapd)); 22045b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 22104f1ad80SBarry Smith if (r_rmapd[i]) { 222ad540459SPierre Jolivet for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j; 22304f1ad80SBarry Smith } 22404f1ad80SBarry Smith } 2259566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd)); 2269566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd)); 22704f1ad80SBarry Smith 2289566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices)); 229ad540459SPierre Jolivet for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1; 23045b6f7e9SBarry Smith no = inA->rmap->mapping->n - nt; 2319566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 23204f1ad80SBarry Smith nt = 0; 23345b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 23445b6f7e9SBarry Smith if (lindices[inA->rmap->mapping->indices[i]]) { 23504f1ad80SBarry Smith nt++; 23645b6f7e9SBarry Smith r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 23704f1ad80SBarry Smith } 23804f1ad80SBarry Smith } 23908401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 2409566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices)); 2419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo)); 24245b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 24304f1ad80SBarry Smith if (r_rmapo[i]) { 244ad540459SPierre Jolivet for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j; 24504f1ad80SBarry Smith } 24604f1ad80SBarry Smith } 2479566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo)); 2489566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo)); 2493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25004f1ad80SBarry Smith } 25104f1ad80SBarry Smith 252d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) 253d71ae5a4SJacob Faibussowitsch { 25404f1ad80SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */ 255b24ad042SBarry Smith PetscInt n, i; 256bca11509SBarry Smith PetscScalar *d, *o; 257bca11509SBarry Smith const PetscScalar *s; 25804f1ad80SBarry Smith 25904f1ad80SBarry Smith PetscFunctionBegin; 26048a46eb9SPierre Jolivet if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale)); 2612cd6534aSBarry Smith 2629566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s)); 26304f1ad80SBarry Smith 2649566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(uglydd, &n)); 2659566063dSJacob Faibussowitsch PetscCall(VecGetArray(uglydd, &d)); 2669371c9d4SSatish Balay for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 2679566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(uglydd, &d)); 26804f1ad80SBarry Smith /* column scale "diagonal" portion of local matrix */ 2699566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, uglydd)); 27004f1ad80SBarry Smith 2719566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(uglyoo, &n)); 2729566063dSJacob Faibussowitsch PetscCall(VecGetArray(uglyoo, &o)); 2739371c9d4SSatish Balay for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 2749566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s)); 2759566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(uglyoo, &o)); 27604f1ad80SBarry Smith /* column scale "off-diagonal" portion of local matrix */ 2779566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, uglyoo)); 2783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 27904f1ad80SBarry Smith } 280