1d4002b98SHong Zhang /* 2d4002b98SHong Zhang Support for the parallel SELL matrix vector multiply 3d4002b98SHong Zhang */ 4d4002b98SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h> 5d4002b98SHong Zhang #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 6d4002b98SHong Zhang 7d4002b98SHong Zhang /* 8d4002b98SHong Zhang Takes the local part of an already assembled MPISELL matrix 9d4002b98SHong Zhang and disassembles it. This is to allow new nonzeros into the matrix 10d4002b98SHong Zhang that require more communication in the matrix vector multiply. 11d4002b98SHong Zhang Thus certain data-structures must be rebuilt. 12d4002b98SHong Zhang 13d4002b98SHong Zhang Kind of slow! But that's what application programmers get when 14d4002b98SHong Zhang they are sloppy. 15d4002b98SHong Zhang */ 16*9371c9d4SSatish Balay PetscErrorCode MatDisAssemble_MPISELL(Mat A) { 17d4002b98SHong Zhang Mat_MPISELL *sell = (Mat_MPISELL *)A->data; 18d4002b98SHong Zhang Mat B = sell->B, Bnew; 19d4002b98SHong Zhang Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data; 20d4002b98SHong Zhang PetscInt i, j, totalslices, N = A->cmap->N, ec, row; 21d4002b98SHong Zhang PetscBool isnonzero; 22d4002b98SHong Zhang 23d4002b98SHong Zhang PetscFunctionBegin; 24d4002b98SHong Zhang /* free stuff related to matrix-vec multiply */ 259566063dSJacob Faibussowitsch PetscCall(VecGetSize(sell->lvec, &ec)); /* needed for PetscLogObjectMemory below */ 269566063dSJacob Faibussowitsch PetscCall(VecDestroy(&sell->lvec)); 279566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&sell->Mvctx)); 28d4002b98SHong Zhang if (sell->colmap) { 29d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE) 309566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&sell->colmap)); 31d4002b98SHong Zhang #else 329566063dSJacob Faibussowitsch PetscCall(PetscFree(sell->colmap)); 339566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, -sell->B->cmap->n * sizeof(PetscInt))); 34d4002b98SHong Zhang #endif 35d4002b98SHong Zhang } 36d4002b98SHong Zhang 37d4002b98SHong Zhang /* make sure that B is assembled so we can access its values */ 389566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 399566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 40d4002b98SHong Zhang 41d4002b98SHong Zhang /* invent new B and copy stuff over */ 429566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 439566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N)); 449566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(Bnew, A, A)); 459566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 469566063dSJacob Faibussowitsch PetscCall(MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen)); 47b38c15b3SStefano Zampini if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */ 48b38c15b3SStefano Zampini ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew; 49b38c15b3SStefano Zampini } 50d4002b98SHong Zhang 51d4002b98SHong Zhang /* 52d4002b98SHong Zhang Ensure that B's nonzerostate is monotonically increasing. 53d4002b98SHong Zhang Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 54d4002b98SHong Zhang */ 55d4002b98SHong Zhang Bnew->nonzerostate = B->nonzerostate; 56d4002b98SHong Zhang 57d4002b98SHong Zhang totalslices = B->rmap->n / 8 + ((B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */ 58d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */ 59d4002b98SHong Zhang for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) { 60d4002b98SHong Zhang isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / 8 < Bsell->rlen[8 * i + row]); 61*9371c9d4SSatish Balay if (isnonzero) { PetscCall(MatSetValue(Bnew, 8 * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode)); } 62d4002b98SHong Zhang } 63d4002b98SHong Zhang } 64d4002b98SHong Zhang 659566063dSJacob Faibussowitsch PetscCall(PetscFree(sell->garray)); 669566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, -ec * sizeof(PetscInt))); 679566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 689566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)Bnew)); 69d4002b98SHong Zhang 70d4002b98SHong Zhang sell->B = Bnew; 71d4002b98SHong Zhang A->was_assembled = PETSC_FALSE; 72d4002b98SHong Zhang A->assembled = PETSC_FALSE; 73d4002b98SHong Zhang PetscFunctionReturn(0); 74d4002b98SHong Zhang } 75d4002b98SHong Zhang 76*9371c9d4SSatish Balay PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat) { 77d4002b98SHong Zhang Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 78d4002b98SHong Zhang Mat_SeqSELL *B = (Mat_SeqSELL *)(sell->B->data); 79d4002b98SHong Zhang PetscInt i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices; 80d4002b98SHong Zhang IS from, to; 81d4002b98SHong Zhang Vec gvec; 82d4002b98SHong Zhang PetscBool isnonzero; 83d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE) 84d4002b98SHong Zhang PetscTable gid1_lid1; 85d4002b98SHong Zhang PetscTablePosition tpos; 86d4002b98SHong Zhang PetscInt gid, lid; 87d4002b98SHong Zhang #else 88d4002b98SHong Zhang PetscInt N = mat->cmap->N, *indices; 89d4002b98SHong Zhang #endif 90d4002b98SHong Zhang 91d4002b98SHong Zhang PetscFunctionBegin; 92d4002b98SHong Zhang totalslices = sell->B->rmap->n / 8 + ((sell->B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */ 93d4002b98SHong Zhang 94d4002b98SHong Zhang /* ec counts the number of columns that contain nonzeros */ 95d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE) 96d4002b98SHong Zhang /* use a table */ 979566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(sell->B->rmap->n, mat->cmap->N + 1, &gid1_lid1)); 98d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */ 99d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 100d4002b98SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 101d4002b98SHong Zhang if (isnonzero) { /* check the mask bit */ 102d4002b98SHong Zhang PetscInt data, gid1 = bcolidx[j] + 1; 1039566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 104d4002b98SHong Zhang if (!data) { 105d4002b98SHong Zhang /* one based table */ 1069566063dSJacob Faibussowitsch PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 107d4002b98SHong Zhang } 108d4002b98SHong Zhang } 109d4002b98SHong Zhang } 110d4002b98SHong Zhang } 111d4002b98SHong Zhang 112d4002b98SHong Zhang /* form array of columns we need */ 1139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 1149566063dSJacob Faibussowitsch PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 115d4002b98SHong Zhang while (tpos) { 1169566063dSJacob Faibussowitsch PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 117d4002b98SHong Zhang gid--; 118d4002b98SHong Zhang lid--; 119d4002b98SHong Zhang garray[lid] = gid; 120d4002b98SHong Zhang } 1219566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */ 1229566063dSJacob Faibussowitsch PetscCall(PetscTableRemoveAll(gid1_lid1)); 123*9371c9d4SSatish Balay for (i = 0; i < ec; i++) { PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); } 124d4002b98SHong Zhang 125d4002b98SHong Zhang /* compact out the extra columns in B */ 126d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */ 127d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 128d4002b98SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 129d4002b98SHong Zhang if (isnonzero) { 130d4002b98SHong Zhang PetscInt gid1 = bcolidx[j] + 1; 1319566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 132d4002b98SHong Zhang lid--; 133d4002b98SHong Zhang bcolidx[j] = lid; 134d4002b98SHong Zhang } 135d4002b98SHong Zhang } 136d4002b98SHong Zhang } 1379566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 1389566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 1399566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&gid1_lid1)); 140d4002b98SHong Zhang #else 141d4002b98SHong Zhang /* Make an array as long as the number of columns */ 1429566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(N, &indices)); 143d4002b98SHong Zhang /* mark those columns that are in sell->B */ 144d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */ 145d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 146d4002b98SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 147d4002b98SHong Zhang if (isnonzero) { 148d4002b98SHong Zhang if (!indices[bcolidx[j]]) ec++; 149d4002b98SHong Zhang indices[bcolidx[j]] = 1; 150d4002b98SHong Zhang } 151d4002b98SHong Zhang } 152d4002b98SHong Zhang } 153d4002b98SHong Zhang 154d4002b98SHong Zhang /* form array of columns we need */ 1559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 156d4002b98SHong Zhang ec = 0; 157d4002b98SHong Zhang for (i = 0; i < N; i++) { 158d4002b98SHong Zhang if (indices[i]) garray[ec++] = i; 159d4002b98SHong Zhang } 160d4002b98SHong Zhang 161d4002b98SHong Zhang /* make indices now point into garray */ 162*9371c9d4SSatish Balay for (i = 0; i < ec; i++) { indices[garray[i]] = i; } 163d4002b98SHong Zhang 164d4002b98SHong Zhang /* compact out the extra columns in B */ 165d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */ 166d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 167d4002b98SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 168d4002b98SHong Zhang if (isnonzero) bcolidx[j] = indices[bcolidx[j]]; 169d4002b98SHong Zhang } 170d4002b98SHong Zhang } 1719566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 1729566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 1739566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 174d4002b98SHong Zhang #endif 175d4002b98SHong Zhang /* create local vector that is used to scatter into */ 1769566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec)); 177d4002b98SHong Zhang /* create two temporary Index sets for build scatter gather */ 1789566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from)); 1799566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to)); 180d4002b98SHong Zhang 181d4002b98SHong Zhang /* create temporary global vector to generate scatter context */ 182d4002b98SHong Zhang /* This does not allocate the array's memory so is efficient */ 1839566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 184d4002b98SHong Zhang 185d4002b98SHong Zhang /* generate the scatter context */ 1869566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx)); 1879566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 1889566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sell->Mvctx)); 1899566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sell->lvec)); 1909566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from)); 1919566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to)); 192d4002b98SHong Zhang 193d4002b98SHong Zhang sell->garray = garray; 194d4002b98SHong Zhang 1959566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)mat, ec * sizeof(PetscInt))); 1969566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1979566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1989566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 199d4002b98SHong Zhang PetscFunctionReturn(0); 200d4002b98SHong Zhang } 201d4002b98SHong Zhang 202d4002b98SHong Zhang /* ugly stuff added for Glenn someday we should fix this up */ 203f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 204f4259b30SLisandro Dalcin static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 205d4002b98SHong Zhang 206*9371c9d4SSatish Balay PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale) { 207d4002b98SHong Zhang Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */ 208d4002b98SHong Zhang PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices; 209d4002b98SHong Zhang PetscInt *r_rmapd, *r_rmapo; 210d4002b98SHong Zhang 211d4002b98SHong Zhang PetscFunctionBegin; 2129566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 2139566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n)); 2149566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 215d4002b98SHong Zhang nt = 0; 216d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) { 217d4002b98SHong Zhang if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 218d4002b98SHong Zhang nt++; 219d4002b98SHong Zhang r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 220d4002b98SHong Zhang } 221d4002b98SHong Zhang } 22208401ef6SPierre Jolivet PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n); 2239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &auglyrmapd)); 224d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) { 225*9371c9d4SSatish Balay if (r_rmapd[i]) { auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; } 226d4002b98SHong Zhang } 2279566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd)); 2289566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd)); 2299566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices)); 230*9371c9d4SSatish Balay for (i = 0; i < ina->B->cmap->n; i++) { lindices[garray[i]] = i + 1; } 231d4002b98SHong Zhang no = inA->rmap->mapping->n - nt; 2329566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 233d4002b98SHong Zhang nt = 0; 234d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) { 235d4002b98SHong Zhang if (lindices[inA->rmap->mapping->indices[i]]) { 236d4002b98SHong Zhang nt++; 237d4002b98SHong Zhang r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 238d4002b98SHong Zhang } 239d4002b98SHong Zhang } 24008401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 2419566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices)); 2429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nt + 1, &auglyrmapo)); 243d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) { 244*9371c9d4SSatish Balay if (r_rmapo[i]) { auglyrmapo[(r_rmapo[i] - 1)] = i; } 245d4002b98SHong Zhang } 2469566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo)); 2479566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo)); 248d4002b98SHong Zhang PetscFunctionReturn(0); 249d4002b98SHong Zhang } 250d4002b98SHong Zhang 251*9371c9d4SSatish Balay PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale) { 252d4002b98SHong Zhang Mat_MPISELL *a = (Mat_MPISELL *)A->data; /*access private part of matrix */ 253d4002b98SHong Zhang PetscInt n, i; 254d4002b98SHong Zhang PetscScalar *d, *o; 255d4002b98SHong Zhang const PetscScalar *s; 256d4002b98SHong Zhang 257d4002b98SHong Zhang PetscFunctionBegin; 258*9371c9d4SSatish Balay if (!auglyrmapd) { PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A, scale)); } 2599566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s)); 2609566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglydd, &n)); 2619566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglydd, &d)); 262*9371c9d4SSatish Balay for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 2639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglydd, &d)); 264d4002b98SHong Zhang /* column scale "diagonal" portion of local matrix */ 2659566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, auglydd)); 2669566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglyoo, &n)); 2679566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglyoo, &o)); 268*9371c9d4SSatish Balay for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 2699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s)); 2709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglyoo, &o)); 271d4002b98SHong Zhang /* column scale "off-diagonal" portion of local matrix */ 2729566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, auglyoo)); 273d4002b98SHong Zhang PetscFunctionReturn(0); 274d4002b98SHong Zhang } 275