1d4002b98SHong Zhang /* 2d4002b98SHong Zhang Support for the parallel SELL matrix vector multiply 3d4002b98SHong Zhang */ 4d4002b98SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h> 5d4002b98SHong Zhang #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 6d4002b98SHong Zhang 7d4002b98SHong Zhang /* 8d4002b98SHong Zhang Takes the local part of an already assembled MPISELL matrix 9d4002b98SHong Zhang and disassembles it. This is to allow new nonzeros into the matrix 10d4002b98SHong Zhang that require more communication in the matrix vector multiply. 11d4002b98SHong Zhang Thus certain data-structures must be rebuilt. 12d4002b98SHong Zhang 13d4002b98SHong Zhang Kind of slow! But that's what application programmers get when 14d4002b98SHong Zhang they are sloppy. 15d4002b98SHong Zhang */ 16d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPISELL(Mat A) 17d71ae5a4SJacob Faibussowitsch { 18d4002b98SHong Zhang Mat_MPISELL *sell = (Mat_MPISELL *)A->data; 19d4002b98SHong Zhang Mat B = sell->B, Bnew; 20d4002b98SHong Zhang Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data; 214dfa11a4SJacob Faibussowitsch PetscInt i, j, totalslices, N = A->cmap->N, row; 22d4002b98SHong Zhang PetscBool isnonzero; 23d4002b98SHong Zhang 24d4002b98SHong Zhang PetscFunctionBegin; 25d4002b98SHong Zhang /* free stuff related to matrix-vec multiply */ 269566063dSJacob Faibussowitsch PetscCall(VecDestroy(&sell->lvec)); 279566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&sell->Mvctx)); 28d4002b98SHong Zhang if (sell->colmap) { 29d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE) 30*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&sell->colmap)); 31d4002b98SHong Zhang #else 329566063dSJacob Faibussowitsch PetscCall(PetscFree(sell->colmap)); 33d4002b98SHong Zhang #endif 34d4002b98SHong Zhang } 35d4002b98SHong Zhang 36d4002b98SHong Zhang /* make sure that B is assembled so we can access its values */ 379566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 389566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 39d4002b98SHong Zhang 40d4002b98SHong Zhang /* invent new B and copy stuff over */ 419566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 429566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N)); 439566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(Bnew, A, A)); 449566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 459566063dSJacob Faibussowitsch PetscCall(MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen)); 46b38c15b3SStefano Zampini if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */ 47b38c15b3SStefano Zampini ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew; 48b38c15b3SStefano Zampini } 49d4002b98SHong Zhang 50d4002b98SHong Zhang /* 51d4002b98SHong Zhang Ensure that B's nonzerostate is monotonically increasing. 52d4002b98SHong Zhang Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 53d4002b98SHong Zhang */ 54d4002b98SHong Zhang Bnew->nonzerostate = B->nonzerostate; 55d4002b98SHong Zhang 56d4002b98SHong Zhang totalslices = B->rmap->n / 8 + ((B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */ 57d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */ 58d4002b98SHong Zhang for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) { 59d4002b98SHong Zhang isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / 8 < Bsell->rlen[8 * i + row]); 6048a46eb9SPierre Jolivet if (isnonzero) PetscCall(MatSetValue(Bnew, 8 * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode)); 61d4002b98SHong Zhang } 62d4002b98SHong Zhang } 63d4002b98SHong Zhang 649566063dSJacob Faibussowitsch PetscCall(PetscFree(sell->garray)); 659566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 66d4002b98SHong Zhang 67d4002b98SHong Zhang sell->B = Bnew; 68d4002b98SHong Zhang A->was_assembled = PETSC_FALSE; 69d4002b98SHong Zhang A->assembled = PETSC_FALSE; 70d4002b98SHong Zhang PetscFunctionReturn(0); 71d4002b98SHong Zhang } 72d4002b98SHong Zhang 73d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat) 74d71ae5a4SJacob Faibussowitsch { 75d4002b98SHong Zhang Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 76d4002b98SHong Zhang Mat_SeqSELL *B = (Mat_SeqSELL *)(sell->B->data); 77d4002b98SHong Zhang PetscInt i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices; 78d4002b98SHong Zhang IS from, to; 79d4002b98SHong Zhang Vec gvec; 80d4002b98SHong Zhang PetscBool isnonzero; 81d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE) 82*eec179cfSJacob Faibussowitsch PetscHMapI gid1_lid1 = NULL; 83*eec179cfSJacob Faibussowitsch PetscHashIter tpos; 84d4002b98SHong Zhang PetscInt gid, lid; 85d4002b98SHong Zhang #else 86d4002b98SHong Zhang PetscInt N = mat->cmap->N, *indices; 87d4002b98SHong Zhang #endif 88d4002b98SHong Zhang 89d4002b98SHong Zhang PetscFunctionBegin; 90d4002b98SHong Zhang totalslices = sell->B->rmap->n / 8 + ((sell->B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */ 91d4002b98SHong Zhang 92d4002b98SHong Zhang /* ec counts the number of columns that contain nonzeros */ 93d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE) 94d4002b98SHong Zhang /* use a table */ 95*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(sell->B->rmap->n, &gid1_lid1)); 96d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */ 97d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 98d4002b98SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 99d4002b98SHong Zhang if (isnonzero) { /* check the mask bit */ 100d4002b98SHong Zhang PetscInt data, gid1 = bcolidx[j] + 1; 101*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 102d4002b98SHong Zhang if (!data) { 103d4002b98SHong Zhang /* one based table */ 104*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapISetWithMode(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 105d4002b98SHong Zhang } 106d4002b98SHong Zhang } 107d4002b98SHong Zhang } 108d4002b98SHong Zhang } 109d4002b98SHong Zhang 110d4002b98SHong Zhang /* form array of columns we need */ 1119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 112*eec179cfSJacob Faibussowitsch PetscHashIterBegin(gid1_lid1, tpos); 113*eec179cfSJacob Faibussowitsch while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 114*eec179cfSJacob Faibussowitsch PetscHashIterGetKey(gid1_lid1, tpos, gid); 115*eec179cfSJacob Faibussowitsch PetscHashIterGetVal(gid1_lid1, tpos, lid); 116*eec179cfSJacob Faibussowitsch PetscHashIterNext(gid1_lid1, tpos); 117d4002b98SHong Zhang gid--; 118d4002b98SHong Zhang lid--; 119d4002b98SHong Zhang garray[lid] = gid; 120d4002b98SHong Zhang } 1219566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */ 122*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIClear(gid1_lid1)); 123*eec179cfSJacob Faibussowitsch for (i = 0; i < ec; i++) PetscCall(PetscHMapISetWithMode(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 124d4002b98SHong Zhang 125d4002b98SHong Zhang /* compact out the extra columns in B */ 126d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */ 127d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 128d4002b98SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 129d4002b98SHong Zhang if (isnonzero) { 130d4002b98SHong Zhang PetscInt gid1 = bcolidx[j] + 1; 131*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 132d4002b98SHong Zhang lid--; 133d4002b98SHong Zhang bcolidx[j] = lid; 134d4002b98SHong Zhang } 135d4002b98SHong Zhang } 136d4002b98SHong Zhang } 1379566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 1389566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 139*eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&gid1_lid1)); 140d4002b98SHong Zhang #else 141d4002b98SHong Zhang /* Make an array as long as the number of columns */ 1429566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(N, &indices)); 143d4002b98SHong Zhang /* mark those columns that are in sell->B */ 144d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */ 145d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 146d4002b98SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 147d4002b98SHong Zhang if (isnonzero) { 148d4002b98SHong Zhang if (!indices[bcolidx[j]]) ec++; 149d4002b98SHong Zhang indices[bcolidx[j]] = 1; 150d4002b98SHong Zhang } 151d4002b98SHong Zhang } 152d4002b98SHong Zhang } 153d4002b98SHong Zhang 154d4002b98SHong Zhang /* form array of columns we need */ 1559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 156d4002b98SHong Zhang ec = 0; 157d4002b98SHong Zhang for (i = 0; i < N; i++) { 158d4002b98SHong Zhang if (indices[i]) garray[ec++] = i; 159d4002b98SHong Zhang } 160d4002b98SHong Zhang 161d4002b98SHong Zhang /* make indices now point into garray */ 162ad540459SPierre Jolivet for (i = 0; i < ec; i++) indices[garray[i]] = i; 163d4002b98SHong Zhang 164d4002b98SHong Zhang /* compact out the extra columns in B */ 165d4002b98SHong Zhang for (i = 0; i < totalslices; i++) { /* loop over slices */ 166d4002b98SHong Zhang for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 167d4002b98SHong Zhang isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 168d4002b98SHong Zhang if (isnonzero) bcolidx[j] = indices[bcolidx[j]]; 169d4002b98SHong Zhang } 170d4002b98SHong Zhang } 1719566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 1729566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 1739566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 174d4002b98SHong Zhang #endif 175d4002b98SHong Zhang /* create local vector that is used to scatter into */ 1769566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec)); 177d4002b98SHong Zhang /* create two temporary Index sets for build scatter gather */ 1789566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from)); 1799566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to)); 180d4002b98SHong Zhang 181d4002b98SHong Zhang /* create temporary global vector to generate scatter context */ 182d4002b98SHong Zhang /* This does not allocate the array's memory so is efficient */ 1839566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 184d4002b98SHong Zhang 185d4002b98SHong Zhang /* generate the scatter context */ 1869566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx)); 1879566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 188d4002b98SHong Zhang 189d4002b98SHong Zhang sell->garray = garray; 190d4002b98SHong Zhang 1919566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1929566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1939566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 194d4002b98SHong Zhang PetscFunctionReturn(0); 195d4002b98SHong Zhang } 196d4002b98SHong Zhang 197d4002b98SHong Zhang /* ugly stuff added for Glenn someday we should fix this up */ 198f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 199f4259b30SLisandro Dalcin static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 200d4002b98SHong Zhang 201d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale) 202d71ae5a4SJacob Faibussowitsch { 203d4002b98SHong Zhang Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */ 204d4002b98SHong Zhang PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices; 205d4002b98SHong Zhang PetscInt *r_rmapd, *r_rmapo; 206d4002b98SHong Zhang 207d4002b98SHong Zhang PetscFunctionBegin; 2089566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 2099566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n)); 2109566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 211d4002b98SHong Zhang nt = 0; 212d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) { 213d4002b98SHong Zhang if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 214d4002b98SHong Zhang nt++; 215d4002b98SHong Zhang r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 216d4002b98SHong Zhang } 217d4002b98SHong Zhang } 21808401ef6SPierre Jolivet PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n); 2199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &auglyrmapd)); 220d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) { 221ad540459SPierre Jolivet if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; 222d4002b98SHong Zhang } 2239566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd)); 2249566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd)); 2259566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices)); 226ad540459SPierre Jolivet for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1; 227d4002b98SHong Zhang no = inA->rmap->mapping->n - nt; 2289566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 229d4002b98SHong Zhang nt = 0; 230d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) { 231d4002b98SHong Zhang if (lindices[inA->rmap->mapping->indices[i]]) { 232d4002b98SHong Zhang nt++; 233d4002b98SHong Zhang r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 234d4002b98SHong Zhang } 235d4002b98SHong Zhang } 23608401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 2379566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices)); 2389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nt + 1, &auglyrmapo)); 239d4002b98SHong Zhang for (i = 0; i < inA->rmap->mapping->n; i++) { 240ad540459SPierre Jolivet if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i; 241d4002b98SHong Zhang } 2429566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo)); 2439566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo)); 244d4002b98SHong Zhang PetscFunctionReturn(0); 245d4002b98SHong Zhang } 246d4002b98SHong Zhang 247d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale) 248d71ae5a4SJacob Faibussowitsch { 249d4002b98SHong Zhang Mat_MPISELL *a = (Mat_MPISELL *)A->data; /*access private part of matrix */ 250d4002b98SHong Zhang PetscInt n, i; 251d4002b98SHong Zhang PetscScalar *d, *o; 252d4002b98SHong Zhang const PetscScalar *s; 253d4002b98SHong Zhang 254d4002b98SHong Zhang PetscFunctionBegin; 25548a46eb9SPierre Jolivet if (!auglyrmapd) PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A, scale)); 2569566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s)); 2579566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglydd, &n)); 2589566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglydd, &d)); 2599371c9d4SSatish Balay for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 2609566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglydd, &d)); 261d4002b98SHong Zhang /* column scale "diagonal" portion of local matrix */ 2629566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, auglydd)); 2639566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglyoo, &n)); 2649566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglyoo, &o)); 2659371c9d4SSatish Balay for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 2669566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s)); 2679566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglyoo, &o)); 268d4002b98SHong Zhang /* column scale "off-diagonal" portion of local matrix */ 2699566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, auglyoo)); 270d4002b98SHong Zhang PetscFunctionReturn(0); 271d4002b98SHong Zhang } 272