18c79f6d3SBarry Smith /* 28c79f6d3SBarry Smith Support for the parallel AIJ matrix vector multiply 38c79f6d3SBarry Smith */ 4c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 5aeda0f58SHong Zhang #include <petsc/private/vecimpl.h> 6af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 78c79f6d3SBarry Smith 8d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 9d71ae5a4SJacob Faibussowitsch { 1044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 11*f4f49eeaSPierre Jolivet Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 12b3c64f9dSJunchao Zhang PetscInt i, j, *aj = B->j, *garray; 13b3c64f9dSJunchao Zhang PetscInt ec = 0; /* Number of nonzero external columns */ 141eb62cbbSBarry Smith IS from, to; 151eb62cbbSBarry Smith Vec gvec; 16aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 17eec179cfSJacob Faibussowitsch PetscHMapI gid1_lid1 = NULL; 18eec179cfSJacob Faibussowitsch PetscHashIter tpos; 19b1d57f15SBarry Smith PetscInt gid, lid; 206f531f54SSatish Balay #else 21d0f46423SBarry Smith PetscInt N = mat->cmap->N, *indices; 222066d6f7SSatish Balay #endif 232066d6f7SSatish Balay 243a40ed3dSBarry Smith PetscFunctionBegin; 254b8d542aSHong Zhang if (!aij->garray) { 2628b400f6SJacob Faibussowitsch PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat"); 27aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 28c5bfad50SMark F. Adams /* use a table */ 29eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(aij->B->rmap->n, &gid1_lid1)); 30d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 312066d6f7SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 32b1d57f15SBarry Smith PetscInt data, gid1 = aj[B->i[i] + j] + 1; 33eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 34fa46199cSSatish Balay if (!data) { 352066d6f7SSatish Balay /* one based table */ 36c76ffc5fSJacob Faibussowitsch PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec)); 372066d6f7SSatish Balay } 382066d6f7SSatish Balay } 392066d6f7SSatish Balay } 402066d6f7SSatish Balay /* form array of columns we need */ 419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 42eec179cfSJacob Faibussowitsch PetscHashIterBegin(gid1_lid1, tpos); 43eec179cfSJacob Faibussowitsch while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 44eec179cfSJacob Faibussowitsch PetscHashIterGetKey(gid1_lid1, tpos, gid); 45eec179cfSJacob Faibussowitsch PetscHashIterGetVal(gid1_lid1, tpos, lid); 46eec179cfSJacob Faibussowitsch PetscHashIterNext(gid1_lid1, tpos); 47b0a32e0cSBarry Smith gid--; 48b0a32e0cSBarry Smith lid--; 492066d6f7SSatish Balay garray[lid] = gid; 502066d6f7SSatish Balay } 519566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */ 52eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIClear(gid1_lid1)); 53c76ffc5fSJacob Faibussowitsch for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1)); 542066d6f7SSatish Balay /* compact out the extra columns in B */ 55d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 562066d6f7SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 57b1d57f15SBarry Smith PetscInt gid1 = aj[B->i[i] + j] + 1; 58eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 59fa46199cSSatish Balay lid--; 60b3fb0a6cSMatthew Knepley aj[B->i[i] + j] = lid; 612066d6f7SSatish Balay } 622066d6f7SSatish Balay } 639566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 649566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap)); 65eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&gid1_lid1)); 662066d6f7SSatish Balay #else 6711285404SBarry Smith /* Make an array as long as the number of columns */ 681eb62cbbSBarry Smith /* mark those columns that are in aij->B */ 699566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(N, &indices)); 70d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 71d6dfbf8fSBarry Smith for (j = 0; j < B->ilen[i]; j++) { 72bfec09a0SHong Zhang if (!indices[aj[B->i[i] + j]]) ec++; 73bfec09a0SHong Zhang indices[aj[B->i[i] + j]] = 1; 74416022c9SBarry Smith } 751eb62cbbSBarry Smith } 768c79f6d3SBarry Smith 771eb62cbbSBarry Smith /* form array of columns we need */ 789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 791eb62cbbSBarry Smith ec = 0; 801eb62cbbSBarry Smith for (i = 0; i < N; i++) { 811eb62cbbSBarry Smith if (indices[i]) garray[ec++] = i; 821eb62cbbSBarry Smith } 831eb62cbbSBarry Smith 841eb62cbbSBarry Smith /* make indices now point into garray */ 85ad540459SPierre Jolivet for (i = 0; i < ec; i++) indices[garray[i]] = i; 861eb62cbbSBarry Smith 871eb62cbbSBarry Smith /* compact out the extra columns in B */ 88d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 89ad540459SPierre Jolivet for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 90d6dfbf8fSBarry Smith } 919566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 929566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap)); 939566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 942066d6f7SSatish Balay #endif 954b8d542aSHong Zhang } else { 964b8d542aSHong Zhang garray = aij->garray; 974b8d542aSHong Zhang } 984b8d542aSHong Zhang 994b8d542aSHong Zhang if (!aij->lvec) { 10028b400f6SJacob Faibussowitsch PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat"); 1019566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL)); 1024b8d542aSHong Zhang } 1039566063dSJacob Faibussowitsch PetscCall(VecGetSize(aij->lvec, &ec)); 1041eb62cbbSBarry Smith 105d6dfbf8fSBarry Smith /* create two temporary Index sets for build scatter gather */ 1069566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from)); 1079566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to)); 1081eb62cbbSBarry Smith 1091eb62cbbSBarry Smith /* create temporary global vector to generate scatter context */ 110b5eb4454SBarry Smith /* This does not allocate the array's memory so is efficient */ 1119566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 1121eb62cbbSBarry Smith 1132d336d48SLois Curfman McInnes /* generate the scatter context */ 1149566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&aij->Mvctx)); 1159566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx)); 1169566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 11767bb5161SHong Zhang aij->garray = garray; 1184b8d542aSHong Zhang 1199566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1209566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1219566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1238c79f6d3SBarry Smith } 1249e25ed09SBarry Smith 125fff043a9SJunchao Zhang /* Disassemble the off-diag portion of the MPIAIJXxx matrix. 126fff043a9SJunchao Zhang In other words, change the B from reduced format using local col ids 127fff043a9SJunchao Zhang to expanded format using global col ids, which will make it easier to 128fff043a9SJunchao Zhang insert new nonzeros (with global col ids) into the matrix. 129fff043a9SJunchao Zhang The off-diag B determines communication in the matrix vector multiply. 1302493cbb0SBarry Smith */ 131d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 132d71ae5a4SJacob Faibussowitsch { 1332493cbb0SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 1347c612885SStefano Zampini Mat B = aij->B, Bnew = NULL; 1352493cbb0SBarry Smith 1363a40ed3dSBarry Smith PetscFunctionBegin; 1372493cbb0SBarry Smith /* free stuff related to matrix-vec multiply */ 1389566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aij->lvec)); 139464493b3SBarry Smith if (aij->colmap) { 140aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 141eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&aij->colmap)); 1422066d6f7SSatish Balay #else 1439566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->colmap)); 1442066d6f7SSatish Balay #endif 145464493b3SBarry Smith } 1462493cbb0SBarry Smith 1477c612885SStefano Zampini if (B) { 1487c612885SStefano Zampini Mat_SeqAIJ *Baij = (Mat_SeqAIJ *)B->data; 1497c612885SStefano Zampini PetscInt i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz; 1507c612885SStefano Zampini PetscScalar v; 1517c612885SStefano Zampini const PetscScalar *ba; 1527c612885SStefano Zampini 1532493cbb0SBarry Smith /* make sure that B is assembled so we can access its values */ 1549566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1559566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1562493cbb0SBarry Smith 1572493cbb0SBarry Smith /* invent new B and copy stuff over */ 1589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &nz)); 159ad540459SPierre Jolivet for (i = 0; i < m; i++) nz[i] = Baij->i[i + 1] - Baij->i[i]; 1609566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 1619566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */ 1629566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(Bnew, A, A)); 1639566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 1649566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz)); 1652205254eSKarl Rupp 166b38c15b3SStefano Zampini if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 167b38c15b3SStefano Zampini ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew; 168b38c15b3SStefano Zampini } 169b38c15b3SStefano Zampini 17077341eacSDmitry Karpeev /* 17177341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing. 17277341eacSDmitry Karpeev Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 17377341eacSDmitry Karpeev */ 174f69fde56SShane Stafford Bnew->nonzerostate = B->nonzerostate; 1752205254eSKarl Rupp 1769566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 1779566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B, &ba)); 1782493cbb0SBarry Smith for (i = 0; i < m; i++) { 179bfec09a0SHong Zhang for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) { 180bfec09a0SHong Zhang col = garray[Baij->j[ct]]; 181fff043a9SJunchao Zhang v = ba[ct++]; 1829566063dSJacob Faibussowitsch PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode)); 1832493cbb0SBarry Smith } 1842493cbb0SBarry Smith } 1859566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 1869566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1877c612885SStefano Zampini } 1887c612885SStefano Zampini PetscCall(PetscFree(aij->garray)); 1892205254eSKarl Rupp 1902493cbb0SBarry Smith aij->B = Bnew; 191227d817aSBarry Smith A->was_assembled = PETSC_FALSE; 192247fa90bSBarry Smith A->assembled = PETSC_FALSE; 1933ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1942493cbb0SBarry Smith } 1952493cbb0SBarry Smith 1962cd6534aSBarry Smith /* ugly stuff added for Glenn someday we should fix this up */ 1972cd6534aSBarry Smith 198f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 199f4259b30SLisandro Dalcin static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 2002cd6534aSBarry Smith 201ba38deedSJacob Faibussowitsch static PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) 202d71ae5a4SJacob Faibussowitsch { 2032cd6534aSBarry Smith Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */ 204b1d57f15SBarry Smith PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices; 205b1d57f15SBarry Smith PetscInt *r_rmapd, *r_rmapo; 2062cd6534aSBarry Smith 2072cd6534aSBarry Smith PetscFunctionBegin; 2089566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 2099566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n)); 2109566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 2112cd6534aSBarry Smith nt = 0; 212992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 213992144d0SBarry Smith if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 2142cd6534aSBarry Smith nt++; 215992144d0SBarry Smith r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 2162cd6534aSBarry Smith } 2172cd6534aSBarry Smith } 21808401ef6SPierre Jolivet PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n); 2199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &auglyrmapd)); 220992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 221ad540459SPierre Jolivet if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; 2222cd6534aSBarry Smith } 2239566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd)); 2249566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd)); 2252cd6534aSBarry Smith 2269566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices)); 227ad540459SPierre Jolivet for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1; 228992144d0SBarry Smith no = inA->rmap->mapping->n - nt; 2299566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 2302cd6534aSBarry Smith nt = 0; 231992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 232992144d0SBarry Smith if (lindices[inA->rmap->mapping->indices[i]]) { 2332cd6534aSBarry Smith nt++; 234992144d0SBarry Smith r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 2352cd6534aSBarry Smith } 2362cd6534aSBarry Smith } 23708401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 2389566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices)); 2399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nt + 1, &auglyrmapo)); 240992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 241ad540459SPierre Jolivet if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i; 2422cd6534aSBarry Smith } 2439566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo)); 2449566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo)); 2453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2462cd6534aSBarry Smith } 2472cd6534aSBarry Smith 248d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale) 249d71ae5a4SJacob Faibussowitsch { 2502cd6534aSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */ 251b1d57f15SBarry Smith PetscInt n, i; 252bca11509SBarry Smith PetscScalar *d, *o; 253bca11509SBarry Smith const PetscScalar *s; 2542cd6534aSBarry Smith 2552cd6534aSBarry Smith PetscFunctionBegin; 25648a46eb9SPierre Jolivet if (!auglyrmapd) PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale)); 2572cd6534aSBarry Smith 2589566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s)); 2592cd6534aSBarry Smith 2609566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglydd, &n)); 2619566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglydd, &d)); 2629371c9d4SSatish Balay for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 2639566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglydd, &d)); 2642cd6534aSBarry Smith /* column scale "diagonal" portion of local matrix */ 2659566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, auglydd)); 2662cd6534aSBarry Smith 2679566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglyoo, &n)); 2689566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglyoo, &o)); 2699371c9d4SSatish Balay for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 2709566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s)); 2719566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglyoo, &o)); 2722cd6534aSBarry Smith /* column scale "off-diagonal" portion of local matrix */ 2739566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, auglyoo)); 2743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2752cd6534aSBarry Smith } 276