1be1d678aSKris Buschelman 28016bdd1SSatish Balay /* 3d9653453SSatish Balay Support for the parallel BAIJ matrix vector multiply 48016bdd1SSatish Balay */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> 6af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7bba1ac68SSatish Balay 8*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9*d71ae5a4SJacob Faibussowitsch { 10d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 11d9653453SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(baij->B->data); 12b24ad042SBarry Smith PetscInt i, j, *aj = B->j, ec = 0, *garray; 13d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, *stmp; 148016bdd1SSatish Balay IS from, to; 158016bdd1SSatish Balay Vec gvec; 16aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 170f5bd95cSBarry Smith PetscTable gid1_lid1; 180f5bd95cSBarry Smith PetscTablePosition tpos; 19b24ad042SBarry Smith PetscInt gid, lid; 206f531f54SSatish Balay #else 21b24ad042SBarry Smith PetscInt Nbs = baij->Nbs, *indices; 2273a2e727SSatish Balay #endif 238016bdd1SSatish Balay 243a40ed3dSBarry Smith PetscFunctionBegin; 25aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 2673a2e727SSatish Balay /* use a table - Mark Adams */ 279566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(B->mbs, baij->Nbs + 1, &gid1_lid1)); 2873a2e727SSatish Balay for (i = 0; i < B->mbs; i++) { 2973a2e727SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 30b24ad042SBarry Smith PetscInt data, gid1 = aj[B->i[i] + j] + 1; 319566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 32fa46199cSSatish Balay if (!data) { 3373a2e727SSatish Balay /* one based table */ 349566063dSJacob Faibussowitsch PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 3573a2e727SSatish Balay } 3673a2e727SSatish Balay } 3773a2e727SSatish Balay } 3873a2e727SSatish Balay /* form array of columns we need */ 399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 409566063dSJacob Faibussowitsch PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 4173a2e727SSatish Balay while (tpos) { 429566063dSJacob Faibussowitsch PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 439371c9d4SSatish Balay gid--; 449371c9d4SSatish Balay lid--; 4573a2e727SSatish Balay garray[lid] = gid; 4673a2e727SSatish Balay } 479566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); 489566063dSJacob Faibussowitsch PetscCall(PetscTableRemoveAll(gid1_lid1)); 4948a46eb9SPierre Jolivet for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 5073a2e727SSatish Balay /* compact out the extra columns in B */ 5173a2e727SSatish Balay for (i = 0; i < B->mbs; i++) { 5273a2e727SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 53b24ad042SBarry Smith PetscInt gid1 = aj[B->i[i] + j] + 1; 549566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 55fa46199cSSatish Balay lid--; 5673a2e727SSatish Balay aj[B->i[i] + j] = lid; 5773a2e727SSatish Balay } 5873a2e727SSatish Balay } 5973a2e727SSatish Balay B->nbs = ec; 609566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 619566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 629566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&gid1_lid1)); 6373a2e727SSatish Balay #else 64435da068SBarry Smith /* Make an array as long as the number of columns */ 65d9653453SSatish Balay /* mark those columns that are in baij->B */ 669566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Nbs, &indices)); 67d9653453SSatish Balay for (i = 0; i < B->mbs; i++) { 688016bdd1SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 69d9653453SSatish Balay if (!indices[aj[B->i[i] + j]]) ec++; 70d9653453SSatish Balay indices[aj[B->i[i] + j]] = 1; 718016bdd1SSatish Balay } 728016bdd1SSatish Balay } 738016bdd1SSatish Balay 748016bdd1SSatish Balay /* form array of columns we need */ 759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 768016bdd1SSatish Balay ec = 0; 77d9653453SSatish Balay for (i = 0; i < Nbs; i++) { 78ad540459SPierre Jolivet if (indices[i]) garray[ec++] = i; 798016bdd1SSatish Balay } 808016bdd1SSatish Balay 818016bdd1SSatish Balay /* make indices now point into garray */ 82ad540459SPierre Jolivet for (i = 0; i < ec; i++) indices[garray[i]] = i; 838016bdd1SSatish Balay 848016bdd1SSatish Balay /* compact out the extra columns in B */ 85d9653453SSatish Balay for (i = 0; i < B->mbs; i++) { 86ad540459SPierre Jolivet for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 878016bdd1SSatish Balay } 88d9653453SSatish Balay B->nbs = ec; 899566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 909566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 919566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 9273a2e727SSatish Balay #endif 938016bdd1SSatish Balay 948016bdd1SSatish Balay /* create local vector that is used to scatter into */ 959566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec)); 968016bdd1SSatish Balay 97c16cb8f2SBarry Smith /* create two temporary index sets for building scatter-gather */ 989566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 99c16cb8f2SBarry Smith 1009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &stmp)); 10126fbe8dcSKarl Rupp for (i = 0; i < ec; i++) stmp[i] = i; 1029566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to)); 1038016bdd1SSatish Balay 1048016bdd1SSatish Balay /* create temporary global vector to generate scatter context */ 1059566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 1068016bdd1SSatish Balay 1079566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx)); 1089566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 10990f02eecSBarry Smith 110d9653453SSatish Balay baij->garray = garray; 11126fbe8dcSKarl Rupp 1129566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1139566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1149566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1153a40ed3dSBarry Smith PetscFunctionReturn(0); 1168016bdd1SSatish Balay } 1178016bdd1SSatish Balay 1188016bdd1SSatish Balay /* 119d9653453SSatish Balay Takes the local part of an already assembled MPIBAIJ matrix 1208016bdd1SSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 1218016bdd1SSatish Balay that require more communication in the matrix vector multiply. 1228016bdd1SSatish Balay Thus certain data-structures must be rebuilt. 1238016bdd1SSatish Balay 1248016bdd1SSatish Balay Kind of slow! But that's what application programmers get when 1258016bdd1SSatish Balay they are sloppy. 1268016bdd1SSatish Balay */ 127*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 128*d71ae5a4SJacob Faibussowitsch { 129d9653453SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 130d9653453SSatish Balay Mat B = baij->B, Bnew; 131d9653453SSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 132d0f46423SBarry Smith PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 1334dfa11a4SJacob Faibussowitsch PetscInt bs2 = baij->bs2, *nz, m = A->rmap->n; 1343eda8832SBarry Smith MatScalar *a = Bbaij->a; 135dd6ea824SBarry Smith MatScalar *atmp; 13697e5c40aSBarry Smith 1373a40ed3dSBarry Smith PetscFunctionBegin; 1388016bdd1SSatish Balay /* free stuff related to matrix-vec multiply */ 1399371c9d4SSatish Balay PetscCall(VecDestroy(&baij->lvec)); 1409371c9d4SSatish Balay baij->lvec = NULL; 1419371c9d4SSatish Balay PetscCall(VecScatterDestroy(&baij->Mvctx)); 1429371c9d4SSatish Balay baij->Mvctx = NULL; 143d9653453SSatish Balay if (baij->colmap) { 144aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 1459566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&baij->colmap)); 14648e59246SSatish Balay #else 1479566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->colmap)); 14848e59246SSatish Balay #endif 1498016bdd1SSatish Balay } 1508016bdd1SSatish Balay 1518016bdd1SSatish Balay /* make sure that B is assembled so we can access its values */ 1529566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1539566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1548016bdd1SSatish Balay 1558016bdd1SSatish Balay /* invent new B and copy stuff over */ 1569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs, &nz)); 157ad540459SPierre Jolivet for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 1589566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew)); 1599566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n)); 1609566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 1619566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 162b38c15b3SStefano Zampini if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 163b38c15b3SStefano Zampini ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 164b38c15b3SStefano Zampini } 16526fbe8dcSKarl Rupp 1669566063dSJacob Faibussowitsch PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE)); 16777341eacSDmitry Karpeev /* 16877341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing. 16977341eacSDmitry Karpeev Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 17077341eacSDmitry Karpeev */ 17177341eacSDmitry Karpeev Bnew->nonzerostate = B->nonzerostate; 172d9653453SSatish Balay 173bba1ac68SSatish Balay for (i = 0; i < mbs; i++) { 174bba1ac68SSatish Balay for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 175bba1ac68SSatish Balay col = garray[Bbaij->j[j]]; 1763eda8832SBarry Smith atmp = a + j * bs2; 1779566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode)); 1788016bdd1SSatish Balay } 1798016bdd1SSatish Balay } 1809566063dSJacob Faibussowitsch PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE)); 181bba1ac68SSatish Balay 1829566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 1839566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->garray)); 1849566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 18526fbe8dcSKarl Rupp 186d9653453SSatish Balay baij->B = Bnew; 1878016bdd1SSatish Balay A->was_assembled = PETSC_FALSE; 1886a719282SBarry Smith A->assembled = PETSC_FALSE; 1893a40ed3dSBarry Smith PetscFunctionReturn(0); 1908016bdd1SSatish Balay } 1918016bdd1SSatish Balay 19204f1ad80SBarry Smith /* ugly stuff added for Glenn someday we should fix this up */ 19304f1ad80SBarry Smith 194f4259b30SLisandro Dalcin static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 195f4259b30SLisandro Dalcin static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 19604f1ad80SBarry Smith 197*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) 198*d71ae5a4SJacob Faibussowitsch { 19904f1ad80SBarry Smith Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */ 20004f1ad80SBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data; 201d0f46423SBarry Smith PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices; 202b24ad042SBarry Smith PetscInt *r_rmapd, *r_rmapo; 20304f1ad80SBarry Smith 20404f1ad80SBarry Smith PetscFunctionBegin; 2059566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 2069566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n)); 2079566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 20804f1ad80SBarry Smith nt = 0; 20945b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 21045b6f7e9SBarry Smith if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) { 21104f1ad80SBarry Smith nt++; 21245b6f7e9SBarry Smith r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 21304f1ad80SBarry Smith } 21404f1ad80SBarry Smith } 21508401ef6SPierre Jolivet PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n); 2169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &uglyrmapd)); 21745b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 21804f1ad80SBarry Smith if (r_rmapd[i]) { 219ad540459SPierre Jolivet for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j; 22004f1ad80SBarry Smith } 22104f1ad80SBarry Smith } 2229566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd)); 2239566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd)); 22404f1ad80SBarry Smith 2259566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices)); 226ad540459SPierre Jolivet for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1; 22745b6f7e9SBarry Smith no = inA->rmap->mapping->n - nt; 2289566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 22904f1ad80SBarry Smith nt = 0; 23045b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 23145b6f7e9SBarry Smith if (lindices[inA->rmap->mapping->indices[i]]) { 23204f1ad80SBarry Smith nt++; 23345b6f7e9SBarry Smith r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 23404f1ad80SBarry Smith } 23504f1ad80SBarry Smith } 23608401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 2379566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices)); 2389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo)); 23945b6f7e9SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 24004f1ad80SBarry Smith if (r_rmapo[i]) { 241ad540459SPierre Jolivet for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j; 24204f1ad80SBarry Smith } 24304f1ad80SBarry Smith } 2449566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo)); 2459566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo)); 24604f1ad80SBarry Smith PetscFunctionReturn(0); 24704f1ad80SBarry Smith } 24804f1ad80SBarry Smith 249*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale) 250*d71ae5a4SJacob Faibussowitsch { 25192b32695SKris Buschelman /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 25292b32695SKris Buschelman 25392b32695SKris Buschelman PetscFunctionBegin; 254cac4c232SBarry Smith PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 25592b32695SKris Buschelman PetscFunctionReturn(0); 25692b32695SKris Buschelman } 25792b32695SKris Buschelman 258*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) 259*d71ae5a4SJacob Faibussowitsch { 26004f1ad80SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */ 261b24ad042SBarry Smith PetscInt n, i; 262bca11509SBarry Smith PetscScalar *d, *o; 263bca11509SBarry Smith const PetscScalar *s; 26404f1ad80SBarry Smith 26504f1ad80SBarry Smith PetscFunctionBegin; 26648a46eb9SPierre Jolivet if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale)); 2672cd6534aSBarry Smith 2689566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s)); 26904f1ad80SBarry Smith 2709566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(uglydd, &n)); 2719566063dSJacob Faibussowitsch PetscCall(VecGetArray(uglydd, &d)); 2729371c9d4SSatish Balay for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 2739566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(uglydd, &d)); 27404f1ad80SBarry Smith /* column scale "diagonal" portion of local matrix */ 2759566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, uglydd)); 27604f1ad80SBarry Smith 2779566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(uglyoo, &n)); 2789566063dSJacob Faibussowitsch PetscCall(VecGetArray(uglyoo, &o)); 2799371c9d4SSatish Balay for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 2809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s)); 2819566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(uglyoo, &o)); 28204f1ad80SBarry Smith /* column scale "off-diagonal" portion of local matrix */ 2839566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, uglyoo)); 28404f1ad80SBarry Smith PetscFunctionReturn(0); 28504f1ad80SBarry Smith } 286