1be1d678aSKris Buschelman 28c79f6d3SBarry Smith /* 38c79f6d3SBarry Smith Support for the parallel AIJ matrix vector multiply 48c79f6d3SBarry Smith */ 5c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 6aeda0f58SHong Zhang #include <petsc/private/vecimpl.h> 7af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 88c79f6d3SBarry Smith 9d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10d71ae5a4SJacob Faibussowitsch { 1144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ *)(aij->B->data); 13b3c64f9dSJunchao Zhang PetscInt i, j, *aj = B->j, *garray; 14b3c64f9dSJunchao Zhang PetscInt ec = 0; /* Number of nonzero external columns */ 151eb62cbbSBarry Smith IS from, to; 161eb62cbbSBarry Smith Vec gvec; 17aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 18eec179cfSJacob Faibussowitsch PetscHMapI gid1_lid1 = NULL; 19eec179cfSJacob Faibussowitsch PetscHashIter tpos; 20b1d57f15SBarry Smith PetscInt gid, lid; 216f531f54SSatish Balay #else 22d0f46423SBarry Smith PetscInt N = mat->cmap->N, *indices; 232066d6f7SSatish Balay #endif 242066d6f7SSatish Balay 253a40ed3dSBarry Smith PetscFunctionBegin; 264b8d542aSHong Zhang if (!aij->garray) { 2728b400f6SJacob Faibussowitsch PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat"); 28aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 29c5bfad50SMark F. Adams /* use a table */ 30eec179cfSJacob Faibussowitsch PetscCall(PetscHMapICreateWithSize(aij->B->rmap->n, &gid1_lid1)); 31d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 322066d6f7SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 33b1d57f15SBarry Smith PetscInt data, gid1 = aj[B->i[i] + j] + 1; 34eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 35fa46199cSSatish Balay if (!data) { 362066d6f7SSatish Balay /* one based table */ 37c76ffc5fSJacob Faibussowitsch PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec)); 382066d6f7SSatish Balay } 392066d6f7SSatish Balay } 402066d6f7SSatish Balay } 412066d6f7SSatish Balay /* form array of columns we need */ 429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 43eec179cfSJacob Faibussowitsch PetscHashIterBegin(gid1_lid1, tpos); 44eec179cfSJacob Faibussowitsch while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 45eec179cfSJacob Faibussowitsch PetscHashIterGetKey(gid1_lid1, tpos, gid); 46eec179cfSJacob Faibussowitsch PetscHashIterGetVal(gid1_lid1, tpos, lid); 47eec179cfSJacob Faibussowitsch PetscHashIterNext(gid1_lid1, tpos); 48b0a32e0cSBarry Smith gid--; 49b0a32e0cSBarry Smith lid--; 502066d6f7SSatish Balay garray[lid] = gid; 512066d6f7SSatish Balay } 529566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */ 53eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIClear(gid1_lid1)); 54c76ffc5fSJacob Faibussowitsch for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1)); 552066d6f7SSatish Balay /* compact out the extra columns in B */ 56d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 572066d6f7SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 58b1d57f15SBarry Smith PetscInt gid1 = aj[B->i[i] + j] + 1; 59eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 60fa46199cSSatish Balay lid--; 61b3fb0a6cSMatthew Knepley aj[B->i[i] + j] = lid; 622066d6f7SSatish Balay } 632066d6f7SSatish Balay } 649566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 659566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap)); 66eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&gid1_lid1)); 672066d6f7SSatish Balay #else 6811285404SBarry Smith /* Make an array as long as the number of columns */ 691eb62cbbSBarry Smith /* mark those columns that are in aij->B */ 709566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(N, &indices)); 71d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 72d6dfbf8fSBarry Smith for (j = 0; j < B->ilen[i]; j++) { 73bfec09a0SHong Zhang if (!indices[aj[B->i[i] + j]]) ec++; 74bfec09a0SHong Zhang indices[aj[B->i[i] + j]] = 1; 75416022c9SBarry Smith } 761eb62cbbSBarry Smith } 778c79f6d3SBarry Smith 781eb62cbbSBarry Smith /* form array of columns we need */ 799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 801eb62cbbSBarry Smith ec = 0; 811eb62cbbSBarry Smith for (i = 0; i < N; i++) { 821eb62cbbSBarry Smith if (indices[i]) garray[ec++] = i; 831eb62cbbSBarry Smith } 841eb62cbbSBarry Smith 851eb62cbbSBarry Smith /* make indices now point into garray */ 86ad540459SPierre Jolivet for (i = 0; i < ec; i++) indices[garray[i]] = i; 871eb62cbbSBarry Smith 881eb62cbbSBarry Smith /* compact out the extra columns in B */ 89d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 90ad540459SPierre Jolivet for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 91d6dfbf8fSBarry Smith } 929566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 939566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap)); 949566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 952066d6f7SSatish Balay #endif 964b8d542aSHong Zhang } else { 974b8d542aSHong Zhang garray = aij->garray; 984b8d542aSHong Zhang } 994b8d542aSHong Zhang 1004b8d542aSHong Zhang if (!aij->lvec) { 10128b400f6SJacob Faibussowitsch PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat"); 1029566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL)); 1034b8d542aSHong Zhang } 1049566063dSJacob Faibussowitsch PetscCall(VecGetSize(aij->lvec, &ec)); 1051eb62cbbSBarry Smith 106d6dfbf8fSBarry Smith /* create two temporary Index sets for build scatter gather */ 1079566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from)); 1089566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to)); 1091eb62cbbSBarry Smith 1101eb62cbbSBarry Smith /* create temporary global vector to generate scatter context */ 111b5eb4454SBarry Smith /* This does not allocate the array's memory so is efficient */ 1129566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 1131eb62cbbSBarry Smith 1142d336d48SLois Curfman McInnes /* generate the scatter context */ 1159566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&aij->Mvctx)); 1169566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx)); 1179566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 11867bb5161SHong Zhang aij->garray = garray; 1194b8d542aSHong Zhang 1209566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1219566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1229566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1248c79f6d3SBarry Smith } 1259e25ed09SBarry Smith 126fff043a9SJunchao Zhang /* Disassemble the off-diag portion of the MPIAIJXxx matrix. 127fff043a9SJunchao Zhang In other words, change the B from reduced format using local col ids 128fff043a9SJunchao Zhang to expanded format using global col ids, which will make it easier to 129fff043a9SJunchao Zhang insert new nonzeros (with global col ids) into the matrix. 130fff043a9SJunchao Zhang The off-diag B determines communication in the matrix vector multiply. 1312493cbb0SBarry Smith */ 132d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 133d71ae5a4SJacob Faibussowitsch { 1342493cbb0SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 135*7c612885SStefano Zampini Mat B = aij->B, Bnew = NULL; 1362493cbb0SBarry Smith 1373a40ed3dSBarry Smith PetscFunctionBegin; 1382493cbb0SBarry Smith /* free stuff related to matrix-vec multiply */ 1399566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aij->lvec)); 140464493b3SBarry Smith if (aij->colmap) { 141aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 142eec179cfSJacob Faibussowitsch PetscCall(PetscHMapIDestroy(&aij->colmap)); 1432066d6f7SSatish Balay #else 1449566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->colmap)); 1452066d6f7SSatish Balay #endif 146464493b3SBarry Smith } 1472493cbb0SBarry Smith 148*7c612885SStefano Zampini if (B) { 149*7c612885SStefano Zampini Mat_SeqAIJ *Baij = (Mat_SeqAIJ *)B->data; 150*7c612885SStefano Zampini PetscInt i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz; 151*7c612885SStefano Zampini PetscScalar v; 152*7c612885SStefano Zampini const PetscScalar *ba; 153*7c612885SStefano Zampini 1542493cbb0SBarry Smith /* make sure that B is assembled so we can access its values */ 1559566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1569566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1572493cbb0SBarry Smith 1582493cbb0SBarry Smith /* invent new B and copy stuff over */ 1599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &nz)); 160ad540459SPierre Jolivet for (i = 0; i < m; i++) nz[i] = Baij->i[i + 1] - Baij->i[i]; 1619566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 1629566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */ 1639566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(Bnew, A, A)); 1649566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 1659566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz)); 1662205254eSKarl Rupp 167b38c15b3SStefano Zampini if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 168b38c15b3SStefano Zampini ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew; 169b38c15b3SStefano Zampini } 170b38c15b3SStefano Zampini 17177341eacSDmitry Karpeev /* 17277341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing. 17377341eacSDmitry Karpeev Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 17477341eacSDmitry Karpeev */ 175f69fde56SShane Stafford Bnew->nonzerostate = B->nonzerostate; 1762205254eSKarl Rupp 1779566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 1789566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B, &ba)); 1792493cbb0SBarry Smith for (i = 0; i < m; i++) { 180bfec09a0SHong Zhang for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) { 181bfec09a0SHong Zhang col = garray[Baij->j[ct]]; 182fff043a9SJunchao Zhang v = ba[ct++]; 1839566063dSJacob Faibussowitsch PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode)); 1842493cbb0SBarry Smith } 1852493cbb0SBarry Smith } 1869566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 1879566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 188*7c612885SStefano Zampini } 189*7c612885SStefano Zampini PetscCall(PetscFree(aij->garray)); 1902205254eSKarl Rupp 1912493cbb0SBarry Smith aij->B = Bnew; 192227d817aSBarry Smith A->was_assembled = PETSC_FALSE; 193247fa90bSBarry Smith A->assembled = PETSC_FALSE; 1943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1952493cbb0SBarry Smith } 1962493cbb0SBarry Smith 1972cd6534aSBarry Smith /* ugly stuff added for Glenn someday we should fix this up */ 1982cd6534aSBarry Smith 199f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 200f4259b30SLisandro Dalcin static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 2012cd6534aSBarry Smith 202d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) 203d71ae5a4SJacob Faibussowitsch { 2042cd6534aSBarry Smith Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */ 205b1d57f15SBarry Smith PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices; 206b1d57f15SBarry Smith PetscInt *r_rmapd, *r_rmapo; 2072cd6534aSBarry Smith 2082cd6534aSBarry Smith PetscFunctionBegin; 2099566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 2109566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n)); 2119566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 2122cd6534aSBarry Smith nt = 0; 213992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 214992144d0SBarry Smith if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 2152cd6534aSBarry Smith nt++; 216992144d0SBarry Smith r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 2172cd6534aSBarry Smith } 2182cd6534aSBarry Smith } 21908401ef6SPierre Jolivet PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n); 2209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &auglyrmapd)); 221992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 222ad540459SPierre Jolivet if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; 2232cd6534aSBarry Smith } 2249566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd)); 2259566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd)); 2262cd6534aSBarry Smith 2279566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices)); 228ad540459SPierre Jolivet for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1; 229992144d0SBarry Smith no = inA->rmap->mapping->n - nt; 2309566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 2312cd6534aSBarry Smith nt = 0; 232992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 233992144d0SBarry Smith if (lindices[inA->rmap->mapping->indices[i]]) { 2342cd6534aSBarry Smith nt++; 235992144d0SBarry Smith r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 2362cd6534aSBarry Smith } 2372cd6534aSBarry Smith } 23808401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 2399566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices)); 2409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nt + 1, &auglyrmapo)); 241992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 242ad540459SPierre Jolivet if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i; 2432cd6534aSBarry Smith } 2449566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo)); 2459566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo)); 2463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2472cd6534aSBarry Smith } 2482cd6534aSBarry Smith 249d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A, Vec scale) 250d71ae5a4SJacob Faibussowitsch { 25192b32695SKris Buschelman /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 25292b32695SKris Buschelman 25392b32695SKris Buschelman PetscFunctionBegin; 254cac4c232SBarry Smith PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 2553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 25692b32695SKris Buschelman } 25792b32695SKris Buschelman 258d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale) 259d71ae5a4SJacob Faibussowitsch { 2602cd6534aSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */ 261b1d57f15SBarry Smith PetscInt n, i; 262bca11509SBarry Smith PetscScalar *d, *o; 263bca11509SBarry Smith const PetscScalar *s; 2642cd6534aSBarry Smith 2652cd6534aSBarry Smith PetscFunctionBegin; 26648a46eb9SPierre Jolivet if (!auglyrmapd) PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale)); 2672cd6534aSBarry Smith 2689566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s)); 2692cd6534aSBarry Smith 2709566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglydd, &n)); 2719566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglydd, &d)); 2729371c9d4SSatish Balay for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 2739566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglydd, &d)); 2742cd6534aSBarry Smith /* column scale "diagonal" portion of local matrix */ 2759566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, auglydd)); 2762cd6534aSBarry Smith 2779566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglyoo, &n)); 2789566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglyoo, &o)); 2799371c9d4SSatish Balay for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 2809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s)); 2819566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglyoo, &o)); 2822cd6534aSBarry Smith /* column scale "off-diagonal" portion of local matrix */ 2839566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, auglyoo)); 2843ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2852cd6534aSBarry Smith } 286