1a30f8f8cSSatish Balay /* 2a30f8f8cSSatish Balay Support for the parallel SBAIJ matrix vector multiply 3a30f8f8cSSatish Balay */ 4c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 57cff1425SSatish Balay 6d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 7d71ae5a4SJacob Faibussowitsch { 840781036SHong Zhang Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data; 9*f4f49eeaSPierre Jolivet Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)sbaij->B->data; 10eec179cfSJacob Faibussowitsch PetscInt i, j, *aj = B->j, ec = 0, *garray, *sgarray; 11d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt; 1240781036SHong Zhang IS from, to; 1340781036SHong Zhang Vec gvec; 14a11b0bdaSJunchao Zhang PetscMPIInt rank = sbaij->rank, lsize; 15077aedafSJed Brown PetscInt *owners = sbaij->rangebs, *ec_owner, k; 16077aedafSJed Brown const PetscInt *sowners; 1740781036SHong Zhang PetscScalar *ptr; 18a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE) 19eec179cfSJacob Faibussowitsch PetscHMapI gid1_lid1 = NULL; /* one-based gid to lid table */ 20eec179cfSJacob Faibussowitsch PetscHashIter tpos; 21a11b0bdaSJunchao Zhang PetscInt gid, lid; 22a11b0bdaSJunchao Zhang #else 23eec179cfSJacob Faibussowitsch PetscInt Nbs = sbaij->Nbs; 24a11b0bdaSJunchao Zhang PetscInt *indices; 25a11b0bdaSJunchao Zhang #endif 2640781036SHong Zhang 2740781036SHong Zhang PetscFunctionBegin; 28a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE) 29eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1)); 30a11b0bdaSJunchao Zhang for (i = 0; i < B->mbs; i++) { 31a11b0bdaSJunchao Zhang for (j = 0; j < B->ilen[i]; j++) { 32a11b0bdaSJunchao Zhang PetscInt data, gid1 = aj[B->i[i] + j] + 1; 33eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 34c76ffc5fSJacob Faibussowitsch if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec)); 35a11b0bdaSJunchao Zhang } 36a11b0bdaSJunchao Zhang } 37a11b0bdaSJunchao Zhang /* form array of columns we need */ 389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 39eec179cfSJacob Faibussowitsch PetscHashIterBegin(gid1_lid1, tpos); 40eec179cfSJacob Faibussowitsch while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 41eec179cfSJacob Faibussowitsch PetscHashIterGetKey(gid1_lid1, tpos, gid); 42eec179cfSJacob Faibussowitsch PetscHashIterGetVal(gid1_lid1, tpos, lid); 43eec179cfSJacob Faibussowitsch PetscHashIterNext(gid1_lid1, tpos); 449371c9d4SSatish Balay gid--; 459371c9d4SSatish Balay lid--; 46a11b0bdaSJunchao Zhang garray[lid] = gid; 47a11b0bdaSJunchao Zhang } 489566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); 49eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIClear(gid1_lid1)); 50c76ffc5fSJacob Faibussowitsch for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1)); 51a11b0bdaSJunchao Zhang /* compact out the extra columns in B */ 52a11b0bdaSJunchao Zhang for (i = 0; i < B->mbs; i++) { 53a11b0bdaSJunchao Zhang for (j = 0; j < B->ilen[i]; j++) { 54a11b0bdaSJunchao Zhang PetscInt gid1 = aj[B->i[i] + j] + 1; 55eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 56a11b0bdaSJunchao Zhang lid--; 57a11b0bdaSJunchao Zhang aj[B->i[i] + j] = lid; 58a11b0bdaSJunchao Zhang } 59a11b0bdaSJunchao Zhang } 60eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&gid1_lid1)); 619566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 62a11b0bdaSJunchao Zhang for (i = j = 0; i < ec; i++) { 63a11b0bdaSJunchao Zhang while (garray[i] >= owners[j + 1]) j++; 64a11b0bdaSJunchao Zhang ec_owner[i] = j; 65a11b0bdaSJunchao Zhang } 66a11b0bdaSJunchao Zhang #else 6740781036SHong Zhang /* For the first stab we make an array as long as the number of columns */ 6840781036SHong Zhang /* mark those columns that are in sbaij->B */ 699566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Nbs, &indices)); 7040781036SHong Zhang for (i = 0; i < mbs; i++) { 7140781036SHong Zhang for (j = 0; j < B->ilen[i]; j++) { 7240781036SHong Zhang if (!indices[aj[B->i[i] + j]]) ec++; 7340781036SHong Zhang indices[aj[B->i[i] + j]] = 1; 7440781036SHong Zhang } 7540781036SHong Zhang } 7640781036SHong Zhang 7740781036SHong Zhang /* form arrays of columns we need */ 789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 799566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 8040781036SHong Zhang 8140781036SHong Zhang ec = 0; 82a11b0bdaSJunchao Zhang for (j = 0; j < sbaij->size; j++) { 8340781036SHong Zhang for (i = owners[j]; i < owners[j + 1]; i++) { 8440781036SHong Zhang if (indices[i]) { 8540781036SHong Zhang garray[ec] = i; 8640781036SHong Zhang ec_owner[ec] = j; 8740781036SHong Zhang ec++; 8840781036SHong Zhang } 8940781036SHong Zhang } 9040781036SHong Zhang } 9140781036SHong Zhang 9240781036SHong Zhang /* make indices now point into garray */ 93b424e231SHong Zhang for (i = 0; i < ec; i++) indices[garray[i]] = i; 9440781036SHong Zhang 9540781036SHong Zhang /* compact out the extra columns in B */ 9640781036SHong Zhang for (i = 0; i < mbs; i++) { 9740781036SHong Zhang for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 9840781036SHong Zhang } 999566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 100a11b0bdaSJunchao Zhang #endif 10140781036SHong Zhang B->nbs = ec; 1029566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sbaij->B->cmap)); 1039566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap)); 10440781036SHong Zhang 1059566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&sbaij->sMvctx)); 10640781036SHong Zhang /* create local vector that is used to scatter into */ 1079566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec)); 10840781036SHong Zhang 10940781036SHong Zhang /* create two temporary index sets for building scatter-gather */ 1109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * ec, &stmp)); 1119566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 11226fbe8dcSKarl Rupp for (i = 0; i < ec; i++) stmp[i] = i; 1139566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to)); 11440781036SHong Zhang 11593d1592dSHong Zhang /* generate the scatter context 116bd096759SJunchao Zhang -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */ 1179566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 1189566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx)); 1199566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 12040781036SHong Zhang 12140781036SHong Zhang sbaij->garray = garray; 12226fbe8dcSKarl Rupp 1239566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1249566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 12540781036SHong Zhang 12640781036SHong Zhang /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 12740781036SHong Zhang lsize = (mbs + ec) * bs; 1289566063dSJacob Faibussowitsch PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0)); 1299566063dSJacob Faibussowitsch PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1)); 1309566063dSJacob Faibussowitsch PetscCall(VecGetSize(sbaij->slvec0, &vec_size)); 13140781036SHong Zhang 1329566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners)); 13340781036SHong Zhang 13440781036SHong Zhang /* x index in the IS sfrom */ 13540781036SHong Zhang for (i = 0; i < ec; i++) { 13640781036SHong Zhang j = ec_owner[i]; 13740781036SHong Zhang sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]); 13840781036SHong Zhang } 13940781036SHong Zhang /* b index in the IS sfrom */ 14040781036SHong Zhang k = sowners[rank] / bs + mbs; 14140781036SHong Zhang for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j; 1429566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from)); 14340781036SHong Zhang 14440781036SHong Zhang /* x index in the IS sto */ 14540781036SHong Zhang k = sowners[rank] / bs + mbs; 146e82e9f6bSBarry Smith for (i = 0; i < ec; i++) stmp[i] = (k + i); 14740781036SHong Zhang /* b index in the IS sto */ 148e82e9f6bSBarry Smith for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec]; 14940781036SHong Zhang 1509566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to)); 15140781036SHong Zhang 1529566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx)); 1539566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 15440781036SHong Zhang 1559566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(sbaij->slvec1, &nt)); 1569566063dSJacob Faibussowitsch PetscCall(VecGetArray(sbaij->slvec1, &ptr)); 1579566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a)); 1588e3a54c0SPierre Jolivet PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec1b)); 1599566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(sbaij->slvec1, &ptr)); 16040781036SHong Zhang 1619566063dSJacob Faibussowitsch PetscCall(VecGetArray(sbaij->slvec0, &ptr)); 1628e3a54c0SPierre Jolivet PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec0b)); 1639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(sbaij->slvec0, &ptr)); 16440781036SHong Zhang 1659566063dSJacob Faibussowitsch PetscCall(PetscFree(stmp)); 16640781036SHong Zhang 1679566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1689566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1699566063dSJacob Faibussowitsch PetscCall(PetscFree2(sgarray, ec_owner)); 1703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 17140781036SHong Zhang } 17240781036SHong Zhang 173a30f8f8cSSatish Balay /* 17401b2bd88SHong Zhang Takes the local part of an already assembled MPISBAIJ matrix 175a30f8f8cSSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 176a30f8f8cSSatish Balay that require more communication in the matrix vector multiply. 177a30f8f8cSSatish Balay Thus certain data-structures must be rebuilt. 178a30f8f8cSSatish Balay 179a30f8f8cSSatish Balay Kind of slow! But that's what application programmers get when 180a30f8f8cSSatish Balay they are sloppy. 181a30f8f8cSSatish Balay */ 182d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 183d71ae5a4SJacob Faibussowitsch { 1842896befeSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data; 185a30f8f8cSSatish Balay Mat B = baij->B, Bnew; 186a30f8f8cSSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 187d0f46423SBarry Smith PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 1884dfa11a4SJacob Faibussowitsch PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n; 189a30f8f8cSSatish Balay MatScalar *a = Bbaij->a; 19087828ca2SBarry Smith PetscScalar *atmp; 191ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 19213f74950SBarry Smith PetscInt l; 193a30f8f8cSSatish Balay #endif 194a30f8f8cSSatish Balay 195a30f8f8cSSatish Balay PetscFunctionBegin; 196ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 1979566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->bs, &atmp)); 19882502324SSatish Balay #endif 199a30f8f8cSSatish Balay /* free stuff related to matrix-vec multiply */ 2009566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->lvec)); 2019566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&baij->Mvctx)); 20201b2bd88SHong Zhang 2039566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec0)); 2049566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec0b)); 2059566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1)); 2069566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1a)); 2079566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1b)); 20801b2bd88SHong Zhang 209a30f8f8cSSatish Balay if (baij->colmap) { 210a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 211eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&baij->colmap)); 212a30f8f8cSSatish Balay #else 2139566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->colmap)); 214a30f8f8cSSatish Balay #endif 215a30f8f8cSSatish Balay } 216a30f8f8cSSatish Balay 217a30f8f8cSSatish Balay /* make sure that B is assembled so we can access its values */ 2189566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2199566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 220a30f8f8cSSatish Balay 221a30f8f8cSSatish Balay /* invent new B and copy stuff over */ 2229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs, &nz)); 223ad540459SPierre Jolivet for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 2249566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 2259566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n)); 2269566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 2279566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 2289566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 229a30f8f8cSSatish Balay 230b38c15b3SStefano Zampini if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 231b38c15b3SStefano Zampini ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 232b38c15b3SStefano Zampini } 233b38c15b3SStefano Zampini 23477341eacSDmitry Karpeev /* 23577341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing. 23677341eacSDmitry Karpeev Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 23777341eacSDmitry Karpeev */ 23877341eacSDmitry Karpeev Bnew->nonzerostate = B->nonzerostate; 2399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs, &rvals)); 240a30f8f8cSSatish Balay for (i = 0; i < mbs; i++) { 241a30f8f8cSSatish Balay rvals[0] = bs * i; 24226fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 243a30f8f8cSSatish Balay for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 244a30f8f8cSSatish Balay col = garray[Bbaij->j[j]] * bs; 245a30f8f8cSSatish Balay for (k = 0; k < bs; k++) { 246ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 247a30f8f8cSSatish Balay for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l]; 248a30f8f8cSSatish Balay #else 24971730473SSatish Balay atmp = a + j * bs2 + k * bs; 250a30f8f8cSSatish Balay #endif 2519566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode)); 252a30f8f8cSSatish Balay col++; 253a30f8f8cSSatish Balay } 254a30f8f8cSSatish Balay } 255a30f8f8cSSatish Balay } 256ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 2579566063dSJacob Faibussowitsch PetscCall(PetscFree(atmp)); 258a30f8f8cSSatish Balay #endif 2599566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->garray)); 26026fbe8dcSKarl Rupp 261f4259b30SLisandro Dalcin baij->garray = NULL; 26226fbe8dcSKarl Rupp 2639566063dSJacob Faibussowitsch PetscCall(PetscFree(rvals)); 2649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 26526fbe8dcSKarl Rupp 266a30f8f8cSSatish Balay baij->B = Bnew; 26726fbe8dcSKarl Rupp 268a30f8f8cSSatish Balay A->was_assembled = PETSC_FALSE; 2693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 270a30f8f8cSSatish Balay } 271