1a30f8f8cSSatish Balay 2a30f8f8cSSatish Balay /* 3a30f8f8cSSatish Balay Support for the parallel SBAIJ matrix vector multiply 4a30f8f8cSSatish Balay */ 5c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 67cff1425SSatish Balay 7*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 8*d71ae5a4SJacob Faibussowitsch { 940781036SHong Zhang Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data; 1040781036SHong Zhang Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(sbaij->B->data); 11a11b0bdaSJunchao Zhang PetscInt Nbs = sbaij->Nbs, i, j, *aj = B->j, ec = 0, *garray, *sgarray; 12d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt; 1340781036SHong Zhang IS from, to; 1440781036SHong Zhang Vec gvec; 15a11b0bdaSJunchao Zhang PetscMPIInt rank = sbaij->rank, lsize; 16077aedafSJed Brown PetscInt *owners = sbaij->rangebs, *ec_owner, k; 17077aedafSJed Brown const PetscInt *sowners; 1840781036SHong Zhang PetscScalar *ptr; 19a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE) 20a11b0bdaSJunchao Zhang PetscTable gid1_lid1; /* one-based gid to lid table */ 21a11b0bdaSJunchao Zhang PetscTablePosition tpos; 22a11b0bdaSJunchao Zhang PetscInt gid, lid; 23a11b0bdaSJunchao Zhang #else 24a11b0bdaSJunchao Zhang PetscInt *indices; 25a11b0bdaSJunchao Zhang #endif 2640781036SHong Zhang 2740781036SHong Zhang PetscFunctionBegin; 28a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE) 299566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(mbs, Nbs + 1, &gid1_lid1)); 30a11b0bdaSJunchao Zhang for (i = 0; i < B->mbs; i++) { 31a11b0bdaSJunchao Zhang for (j = 0; j < B->ilen[i]; j++) { 32a11b0bdaSJunchao Zhang PetscInt data, gid1 = aj[B->i[i] + j] + 1; 339566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 349566063dSJacob Faibussowitsch if (!data) PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 35a11b0bdaSJunchao Zhang } 36a11b0bdaSJunchao Zhang } 37a11b0bdaSJunchao Zhang /* form array of columns we need */ 389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 399566063dSJacob Faibussowitsch PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 40a11b0bdaSJunchao Zhang while (tpos) { 419566063dSJacob Faibussowitsch PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 429371c9d4SSatish Balay gid--; 439371c9d4SSatish Balay lid--; 44a11b0bdaSJunchao Zhang garray[lid] = gid; 45a11b0bdaSJunchao Zhang } 469566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); 479566063dSJacob Faibussowitsch PetscCall(PetscTableRemoveAll(gid1_lid1)); 489566063dSJacob Faibussowitsch for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 49a11b0bdaSJunchao Zhang /* compact out the extra columns in B */ 50a11b0bdaSJunchao Zhang for (i = 0; i < B->mbs; i++) { 51a11b0bdaSJunchao Zhang for (j = 0; j < B->ilen[i]; j++) { 52a11b0bdaSJunchao Zhang PetscInt gid1 = aj[B->i[i] + j] + 1; 539566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 54a11b0bdaSJunchao Zhang lid--; 55a11b0bdaSJunchao Zhang aj[B->i[i] + j] = lid; 56a11b0bdaSJunchao Zhang } 57a11b0bdaSJunchao Zhang } 589566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&gid1_lid1)); 599566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 60a11b0bdaSJunchao Zhang for (i = j = 0; i < ec; i++) { 61a11b0bdaSJunchao Zhang while (garray[i] >= owners[j + 1]) j++; 62a11b0bdaSJunchao Zhang ec_owner[i] = j; 63a11b0bdaSJunchao Zhang } 64a11b0bdaSJunchao Zhang #else 6540781036SHong Zhang /* For the first stab we make an array as long as the number of columns */ 6640781036SHong Zhang /* mark those columns that are in sbaij->B */ 679566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(Nbs, &indices)); 6840781036SHong Zhang for (i = 0; i < mbs; i++) { 6940781036SHong Zhang for (j = 0; j < B->ilen[i]; j++) { 7040781036SHong Zhang if (!indices[aj[B->i[i] + j]]) ec++; 7140781036SHong Zhang indices[aj[B->i[i] + j]] = 1; 7240781036SHong Zhang } 7340781036SHong Zhang } 7440781036SHong Zhang 7540781036SHong Zhang /* form arrays of columns we need */ 769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 779566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 7840781036SHong Zhang 7940781036SHong Zhang ec = 0; 80a11b0bdaSJunchao Zhang for (j = 0; j < sbaij->size; j++) { 8140781036SHong Zhang for (i = owners[j]; i < owners[j + 1]; i++) { 8240781036SHong Zhang if (indices[i]) { 8340781036SHong Zhang garray[ec] = i; 8440781036SHong Zhang ec_owner[ec] = j; 8540781036SHong Zhang ec++; 8640781036SHong Zhang } 8740781036SHong Zhang } 8840781036SHong Zhang } 8940781036SHong Zhang 9040781036SHong Zhang /* make indices now point into garray */ 91b424e231SHong Zhang for (i = 0; i < ec; i++) indices[garray[i]] = i; 9240781036SHong Zhang 9340781036SHong Zhang /* compact out the extra columns in B */ 9440781036SHong Zhang for (i = 0; i < mbs; i++) { 9540781036SHong Zhang for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 9640781036SHong Zhang } 979566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 98a11b0bdaSJunchao Zhang #endif 9940781036SHong Zhang B->nbs = ec; 1009566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sbaij->B->cmap)); 1019566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap)); 10240781036SHong Zhang 1039566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&sbaij->sMvctx)); 10440781036SHong Zhang /* create local vector that is used to scatter into */ 1059566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec)); 10640781036SHong Zhang 10740781036SHong Zhang /* create two temporary index sets for building scatter-gather */ 1089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * ec, &stmp)); 1099566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 11026fbe8dcSKarl Rupp for (i = 0; i < ec; i++) stmp[i] = i; 1119566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to)); 11240781036SHong Zhang 11393d1592dSHong Zhang /* generate the scatter context 114bd096759SJunchao Zhang -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */ 1159566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 1169566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx)); 1179566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 11840781036SHong Zhang 11940781036SHong Zhang sbaij->garray = garray; 12026fbe8dcSKarl Rupp 1219566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1229566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 12340781036SHong Zhang 12440781036SHong Zhang /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 12540781036SHong Zhang lsize = (mbs + ec) * bs; 1269566063dSJacob Faibussowitsch PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0)); 1279566063dSJacob Faibussowitsch PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1)); 1289566063dSJacob Faibussowitsch PetscCall(VecGetSize(sbaij->slvec0, &vec_size)); 12940781036SHong Zhang 1309566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners)); 13140781036SHong Zhang 13240781036SHong Zhang /* x index in the IS sfrom */ 13340781036SHong Zhang for (i = 0; i < ec; i++) { 13440781036SHong Zhang j = ec_owner[i]; 13540781036SHong Zhang sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]); 13640781036SHong Zhang } 13740781036SHong Zhang /* b index in the IS sfrom */ 13840781036SHong Zhang k = sowners[rank] / bs + mbs; 13940781036SHong Zhang for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j; 1409566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from)); 14140781036SHong Zhang 14240781036SHong Zhang /* x index in the IS sto */ 14340781036SHong Zhang k = sowners[rank] / bs + mbs; 144e82e9f6bSBarry Smith for (i = 0; i < ec; i++) stmp[i] = (k + i); 14540781036SHong Zhang /* b index in the IS sto */ 146e82e9f6bSBarry Smith for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec]; 14740781036SHong Zhang 1489566063dSJacob Faibussowitsch PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to)); 14940781036SHong Zhang 1509566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx)); 1519566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 15240781036SHong Zhang 1539566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(sbaij->slvec1, &nt)); 1549566063dSJacob Faibussowitsch PetscCall(VecGetArray(sbaij->slvec1, &ptr)); 1559566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a)); 1569566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b)); 1579566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(sbaij->slvec1, &ptr)); 15840781036SHong Zhang 1599566063dSJacob Faibussowitsch PetscCall(VecGetArray(sbaij->slvec0, &ptr)); 1609566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b)); 1619566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(sbaij->slvec0, &ptr)); 16240781036SHong Zhang 1639566063dSJacob Faibussowitsch PetscCall(PetscFree(stmp)); 16440781036SHong Zhang 1659566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1669566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1679566063dSJacob Faibussowitsch PetscCall(PetscFree2(sgarray, ec_owner)); 16840781036SHong Zhang PetscFunctionReturn(0); 16940781036SHong Zhang } 17040781036SHong Zhang 171a30f8f8cSSatish Balay /* 17201b2bd88SHong Zhang Takes the local part of an already assembled MPISBAIJ matrix 173a30f8f8cSSatish Balay and disassembles it. This is to allow new nonzeros into the matrix 174a30f8f8cSSatish Balay that require more communication in the matrix vector multiply. 175a30f8f8cSSatish Balay Thus certain data-structures must be rebuilt. 176a30f8f8cSSatish Balay 177a30f8f8cSSatish Balay Kind of slow! But that's what application programmers get when 178a30f8f8cSSatish Balay they are sloppy. 179a30f8f8cSSatish Balay */ 180*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 181*d71ae5a4SJacob Faibussowitsch { 1822896befeSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data; 183a30f8f8cSSatish Balay Mat B = baij->B, Bnew; 184a30f8f8cSSatish Balay Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 185d0f46423SBarry Smith PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 1864dfa11a4SJacob Faibussowitsch PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n; 187a30f8f8cSSatish Balay MatScalar *a = Bbaij->a; 18887828ca2SBarry Smith PetscScalar *atmp; 189ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 19013f74950SBarry Smith PetscInt l; 191a30f8f8cSSatish Balay #endif 192a30f8f8cSSatish Balay 193a30f8f8cSSatish Balay PetscFunctionBegin; 194ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 1959566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->bs, &atmp)); 19682502324SSatish Balay #endif 197a30f8f8cSSatish Balay /* free stuff related to matrix-vec multiply */ 1989566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->lvec)); 1999566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&baij->Mvctx)); 20001b2bd88SHong Zhang 2019566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec0)); 2029566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec0b)); 2039566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1)); 2049566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1a)); 2059566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->slvec1b)); 20601b2bd88SHong Zhang 207a30f8f8cSSatish Balay if (baij->colmap) { 208a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE) 2099566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&baij->colmap)); 210a30f8f8cSSatish Balay #else 2119566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->colmap)); 212a30f8f8cSSatish Balay #endif 213a30f8f8cSSatish Balay } 214a30f8f8cSSatish Balay 215a30f8f8cSSatish Balay /* make sure that B is assembled so we can access its values */ 2169566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2179566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 218a30f8f8cSSatish Balay 219a30f8f8cSSatish Balay /* invent new B and copy stuff over */ 2209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs, &nz)); 221ad540459SPierre Jolivet for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 2229566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 2239566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n)); 2249566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 2259566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 2269566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 227a30f8f8cSSatish Balay 228b38c15b3SStefano Zampini if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 229b38c15b3SStefano Zampini ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 230b38c15b3SStefano Zampini } 231b38c15b3SStefano Zampini 23277341eacSDmitry Karpeev /* 23377341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing. 23477341eacSDmitry Karpeev Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 23577341eacSDmitry Karpeev */ 23677341eacSDmitry Karpeev Bnew->nonzerostate = B->nonzerostate; 2379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs, &rvals)); 238a30f8f8cSSatish Balay for (i = 0; i < mbs; i++) { 239a30f8f8cSSatish Balay rvals[0] = bs * i; 24026fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 241a30f8f8cSSatish Balay for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 242a30f8f8cSSatish Balay col = garray[Bbaij->j[j]] * bs; 243a30f8f8cSSatish Balay for (k = 0; k < bs; k++) { 244ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 245a30f8f8cSSatish Balay for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l]; 246a30f8f8cSSatish Balay #else 24771730473SSatish Balay atmp = a + j * bs2 + k * bs; 248a30f8f8cSSatish Balay #endif 2499566063dSJacob Faibussowitsch PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode)); 250a30f8f8cSSatish Balay col++; 251a30f8f8cSSatish Balay } 252a30f8f8cSSatish Balay } 253a30f8f8cSSatish Balay } 254ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 2559566063dSJacob Faibussowitsch PetscCall(PetscFree(atmp)); 256a30f8f8cSSatish Balay #endif 2579566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->garray)); 25826fbe8dcSKarl Rupp 259f4259b30SLisandro Dalcin baij->garray = NULL; 26026fbe8dcSKarl Rupp 2619566063dSJacob Faibussowitsch PetscCall(PetscFree(rvals)); 2629566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 26326fbe8dcSKarl Rupp 264a30f8f8cSSatish Balay baij->B = Bnew; 26526fbe8dcSKarl Rupp 266a30f8f8cSSatish Balay A->was_assembled = PETSC_FALSE; 267a30f8f8cSSatish Balay PetscFunctionReturn(0); 268a30f8f8cSSatish Balay } 269