1a30f8f8cSSatish Balay 2a30f8f8cSSatish Balay /* 3a30f8f8cSSatish Balay Support for the parallel SBAIJ matrix vector multiply 4a30f8f8cSSatish Balay */ 5c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 67cff1425SSatish Balay 79371c9d4SSatish Balay PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) { 840781036SHong Zhang Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data; 940781036SHong Zhang Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(sbaij->B->data); 10a11b0bdaSJunchao Zhang PetscInt Nbs = sbaij->Nbs, i, j, *aj = B->j, ec = 0, *garray, *sgarray; 11d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt; 1240781036SHong Zhang IS from, to; 1340781036SHong Zhang Vec gvec; 14a11b0bdaSJunchao Zhang PetscMPIInt rank = sbaij->rank, lsize; 15077aedafSJed Brown PetscInt *owners = sbaij->rangebs, *ec_owner, k; 16077aedafSJed Brown const PetscInt *sowners; 1740781036SHong Zhang PetscScalar *ptr; 18a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE) 19a11b0bdaSJunchao Zhang PetscTable gid1_lid1; /* one-based gid to lid table */ 20a11b0bdaSJunchao Zhang PetscTablePosition tpos; 21a11b0bdaSJunchao Zhang PetscInt gid, lid; 22a11b0bdaSJunchao Zhang #else 23a11b0bdaSJunchao Zhang PetscInt *indices; 24a11b0bdaSJunchao Zhang #endif 2540781036SHong Zhang 2640781036SHong Zhang PetscFunctionBegin; 27a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE) 289566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(mbs, Nbs + 1, &gid1_lid1)); 29a11b0bdaSJunchao Zhang for (i = 0; i < B->mbs; i++) { 30a11b0bdaSJunchao Zhang for (j = 0; j < B->ilen[i]; j++) { 31a11b0bdaSJunchao Zhang PetscInt data, gid1 = aj[B->i[i] + j] + 1; 329566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 339566063dSJacob Faibussowitsch if (!data) PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 34a11b0bdaSJunchao Zhang } 35a11b0bdaSJunchao Zhang } 36a11b0bdaSJunchao Zhang /* form array of columns we need */ 379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 389566063dSJacob Faibussowitsch PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 39a11b0bdaSJunchao Zhang while (tpos) { 409566063dSJacob Faibussowitsch PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 419371c9d4SSatish Balay gid--; 429371c9d4SSatish Balay lid--; 43a11b0bdaSJunchao Zhang garray[lid] = gid; 44a11b0bdaSJunchao Zhang } 459566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); 469566063dSJacob Faibussowitsch PetscCall(PetscTableRemoveAll(gid1_lid1)); 479566063dSJacob Faibussowitsch for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 48a11b0bdaSJunchao Zhang /* compact out the extra columns in B */ 49a11b0bdaSJunchao Zhang for (i = 0; i < B->mbs; i++) { 50a11b0bdaSJunchao Zhang for (j = 0; j < B->ilen[i]; j++) { 51a11b0bdaSJunchao Zhang PetscInt gid1 = aj[B->i[i] + j] + 1; 529566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 53a11b0bdaSJunchao Zhang lid--; 54a11b0bdaSJunchao Zhang aj[B->i[i] + j] = lid; 55a11b0bdaSJunchao Zhang } 56a11b0bdaSJunchao Zhang } 579566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&gid1_lid1)); 589566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 59a11b0bdaSJunchao Zhang for (i = j = 0; i < ec; i++) { 60a11b0bdaSJunchao Zhang while (garray[i] >= owners[j + 1]) j++; 61a11b0bdaSJunchao Zhang ec_owner[i] = j; 62a11b0bdaSJunchao Zhang } 63a11b0bdaSJunchao Zhang #else 6440781036SHong Zhang /* For the first stab we make an array as long as the number of columns */ 6540781036SHong Zhang /* mark those columns that are in sbaij->B */ 669566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Nbs, &indices)); 6740781036SHong Zhang for (i = 0; i < mbs; i++) { 6840781036SHong Zhang for (j = 0; j < B->ilen[i]; j++) { 6940781036SHong Zhang if (!indices[aj[B->i[i] + j]]) ec++; 7040781036SHong Zhang indices[aj[B->i[i] + j]] = 1; 7140781036SHong Zhang } 7240781036SHong Zhang } 7340781036SHong Zhang 7440781036SHong Zhang /* form arrays of columns we need */ 759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 769566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 7740781036SHong Zhang 7840781036SHong Zhang ec = 0; 79a11b0bdaSJunchao Zhang for (j = 0; j < sbaij->size; j++) { 8040781036SHong Zhang for (i = owners[j]; i < owners[j + 1]; i++) { 8140781036SHong Zhang if (indices[i]) { 8240781036SHong Zhang garray[ec] = i; 8340781036SHong Zhang ec_owner[ec] = j; 8440781036SHong Zhang ec++; 8540781036SHong Zhang } 8640781036SHong Zhang } 8740781036SHong Zhang } 8840781036SHong Zhang 8940781036SHong Zhang /* make indices now point into garray */ 90b424e231SHong Zhang for (i = 0; i < ec; i++) indices[garray[i]] = i; 9140781036SHong Zhang 9240781036SHong Zhang /* compact out the extra columns in B */ 9340781036SHong Zhang for (i = 0; i < mbs; i++) { 9440781036SHong Zhang for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 9540781036SHong Zhang } 969566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 97a11b0bdaSJunchao Zhang #endif 9840781036SHong Zhang B->nbs = ec; 999566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sbaij->B->cmap)); 1009566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap)); 10140781036SHong Zhang 1029566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&sbaij->sMvctx)); 10340781036SHong Zhang /* create local vector that is used to scatter into */ 1049566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec)); 10540781036SHong Zhang 10640781036SHong Zhang /* create two temporary index sets for building scatter-gather */ 1079566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * ec, &stmp)); 1089566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 10926fbe8dcSKarl Rupp for (i = 0; i < ec; i++) stmp[i] = i; 1109566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to)); 11140781036SHong Zhang 11293d1592dSHong Zhang /* generate the scatter context 113bd096759SJunchao Zhang -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */ 1149566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 1159566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx)); 1169566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 11740781036SHong Zhang 11840781036SHong Zhang sbaij->garray = garray; 11926fbe8dcSKarl Rupp 1209566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->Mvctx)); 1219566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->lvec)); 1229566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from)); 1239566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to)); 12440781036SHong Zhang 1259566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1269566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 12740781036SHong Zhang 12840781036SHong Zhang /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 12940781036SHong Zhang lsize = (mbs + ec) * bs; 1309566063dSJacob Faibussowitsch PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0)); 1319566063dSJacob Faibussowitsch PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1)); 1329566063dSJacob Faibussowitsch PetscCall(VecGetSize(sbaij->slvec0, &vec_size)); 13340781036SHong Zhang 1349566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners)); 13540781036SHong Zhang 13640781036SHong Zhang /* x index in the IS sfrom */ 13740781036SHong Zhang for (i = 0; i < ec; i++) { 13840781036SHong Zhang j = ec_owner[i]; 13940781036SHong Zhang sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]); 14040781036SHong Zhang } 14140781036SHong Zhang /* b index in the IS sfrom */ 14240781036SHong Zhang k = sowners[rank] / bs + mbs; 14340781036SHong Zhang for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j; 1449566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from)); 14540781036SHong Zhang 14640781036SHong Zhang /* x index in the IS sto */ 14740781036SHong Zhang k = sowners[rank] / bs + mbs; 148e82e9f6bSBarry Smith for (i = 0; i < ec; i++) stmp[i] = (k + i); 14940781036SHong Zhang /* b index in the IS sto */ 150e82e9f6bSBarry Smith for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec]; 15140781036SHong Zhang 1529566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to)); 15340781036SHong Zhang 1549566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx)); 1559566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 15640781036SHong Zhang 1579566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(sbaij->slvec1, &nt)); 1589566063dSJacob Faibussowitsch PetscCall(VecGetArray(sbaij->slvec1, &ptr)); 1599566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a)); 1609566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b)); 1619566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(sbaij->slvec1, &ptr)); 16240781036SHong Zhang 1639566063dSJacob Faibussowitsch PetscCall(VecGetArray(sbaij->slvec0, &ptr)); 1649566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b)); 1659566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(sbaij->slvec0, &ptr)); 16640781036SHong Zhang 1679566063dSJacob Faibussowitsch PetscCall(PetscFree(stmp)); 16840781036SHong Zhang 1699566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->sMvctx)); 1709566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec0)); 1719566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec1)); 1729566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec0b)); 1739566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec1a)); 1749566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec1b)); 1759566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from)); 1769566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to)); 17740781036SHong Zhang 1789566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)mat, ec * sizeof(PetscInt))); 1799566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1809566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1819566063dSJacob Faibussowitsch PetscCall(PetscFree2(sgarray, ec_owner)); 18240781036SHong Zhang PetscFunctionReturn(0); 18340781036SHong Zhang } 18440781036SHong Zhang 185a30f8f8cSSatish Balay /* 18601b2bd88SHong Zhang Takes the local part of an already assembled MPISBAIJ matrix 187a30f8f8cSSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 188a30f8f8cSSatish Balay that require more communication in the matrix vector multiply. 189a30f8f8cSSatish Balay Thus certain data-structures must be rebuilt. 190a30f8f8cSSatish Balay 191a30f8f8cSSatish Balay Kind of slow! But that's what application programmers get when 192a30f8f8cSSatish Balay they are sloppy. 193a30f8f8cSSatish Balay */ 1949371c9d4SSatish Balay PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) { 1952896befeSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data; 196a30f8f8cSSatish Balay Mat B = baij->B, Bnew; 197a30f8f8cSSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 198d0f46423SBarry Smith PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 199d0f46423SBarry Smith PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, ec, m = A->rmap->n; 200a30f8f8cSSatish Balay MatScalar *a = Bbaij->a; 20187828ca2SBarry Smith PetscScalar *atmp; 202ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 20313f74950SBarry Smith PetscInt l; 204a30f8f8cSSatish Balay #endif 205a30f8f8cSSatish Balay 206a30f8f8cSSatish Balay PetscFunctionBegin; 207ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 2089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->bs, &atmp)); 20982502324SSatish Balay #endif 210a30f8f8cSSatish Balay /* free stuff related to matrix-vec multiply */ 2119566063dSJacob Faibussowitsch PetscCall(VecGetSize(baij->lvec, &ec)); /* needed for PetscLogObjectMemory below */ 2129566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->lvec)); 2139566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&baij->Mvctx)); 21401b2bd88SHong Zhang 2159566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec0)); 2169566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec0b)); 2179566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1)); 2189566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1a)); 2199566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1b)); 22001b2bd88SHong Zhang 221a30f8f8cSSatish Balay if (baij->colmap) { 222a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 2239566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&baij->colmap)); 224a30f8f8cSSatish Balay #else 2259566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->colmap)); 2269566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, -Bbaij->nbs * sizeof(PetscInt))); 227a30f8f8cSSatish Balay #endif 228a30f8f8cSSatish Balay } 229a30f8f8cSSatish Balay 230a30f8f8cSSatish Balay /* make sure that B is assembled so we can access its values */ 2319566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2329566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 233a30f8f8cSSatish Balay 234a30f8f8cSSatish Balay /* invent new B and copy stuff over */ 2359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs, &nz)); 236*ad540459SPierre Jolivet for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 2379566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 2389566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n)); 2399566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 2409566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 2419566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 242a30f8f8cSSatish Balay 243b38c15b3SStefano Zampini if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 244b38c15b3SStefano Zampini ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 245b38c15b3SStefano Zampini } 246b38c15b3SStefano Zampini 24777341eacSDmitry Karpeev /* 24877341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing. 24977341eacSDmitry Karpeev Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 25077341eacSDmitry Karpeev */ 25177341eacSDmitry Karpeev Bnew->nonzerostate = B->nonzerostate; 2529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs, &rvals)); 253a30f8f8cSSatish Balay for (i = 0; i < mbs; i++) { 254a30f8f8cSSatish Balay rvals[0] = bs * i; 25526fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 256a30f8f8cSSatish Balay for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 257a30f8f8cSSatish Balay col = garray[Bbaij->j[j]] * bs; 258a30f8f8cSSatish Balay for (k = 0; k < bs; k++) { 259ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 260a30f8f8cSSatish Balay for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l]; 261a30f8f8cSSatish Balay #else 26271730473SSatish Balay atmp = a + j * bs2 + k * bs; 263a30f8f8cSSatish Balay #endif 2649566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode)); 265a30f8f8cSSatish Balay col++; 266a30f8f8cSSatish Balay } 267a30f8f8cSSatish Balay } 268a30f8f8cSSatish Balay } 269ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 2709566063dSJacob Faibussowitsch PetscCall(PetscFree(atmp)); 271a30f8f8cSSatish Balay #endif 2729566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->garray)); 27326fbe8dcSKarl Rupp 274f4259b30SLisandro Dalcin baij->garray = NULL; 27526fbe8dcSKarl Rupp 2769566063dSJacob Faibussowitsch PetscCall(PetscFree(rvals)); 2779566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, -ec * sizeof(PetscInt))); 2789566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 2799566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)Bnew)); 28026fbe8dcSKarl Rupp 281a30f8f8cSSatish Balay baij->B = Bnew; 28226fbe8dcSKarl Rupp 283a30f8f8cSSatish Balay A->was_assembled = PETSC_FALSE; 284a30f8f8cSSatish Balay PetscFunctionReturn(0); 285a30f8f8cSSatish Balay } 286