1be1d678aSKris Buschelman 28c79f6d3SBarry Smith /* 38c79f6d3SBarry Smith Support for the parallel AIJ matrix vector multiply 48c79f6d3SBarry Smith */ 5c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 6aeda0f58SHong Zhang #include <petsc/private/vecimpl.h> 7af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 88c79f6d3SBarry Smith 9d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10d71ae5a4SJacob Faibussowitsch { 1144a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ *)(aij->B->data); 13b3c64f9dSJunchao Zhang PetscInt i, j, *aj = B->j, *garray; 14b3c64f9dSJunchao Zhang PetscInt ec = 0; /* Number of nonzero external columns */ 151eb62cbbSBarry Smith IS from, to; 161eb62cbbSBarry Smith Vec gvec; 17aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 180f5bd95cSBarry Smith PetscTable gid1_lid1; 190f5bd95cSBarry Smith PetscTablePosition tpos; 20b1d57f15SBarry Smith PetscInt gid, lid; 216f531f54SSatish Balay #else 22d0f46423SBarry Smith PetscInt N = mat->cmap->N, *indices; 232066d6f7SSatish Balay #endif 242066d6f7SSatish Balay 253a40ed3dSBarry Smith PetscFunctionBegin; 264b8d542aSHong Zhang if (!aij->garray) { 2728b400f6SJacob Faibussowitsch PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat"); 28aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 29c5bfad50SMark F. Adams /* use a table */ 309566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(aij->B->rmap->n, mat->cmap->N + 1, &gid1_lid1)); 31d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 322066d6f7SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 33b1d57f15SBarry Smith PetscInt data, gid1 = aj[B->i[i] + j] + 1; 349566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 35fa46199cSSatish Balay if (!data) { 362066d6f7SSatish Balay /* one based table */ 379566063dSJacob Faibussowitsch PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 382066d6f7SSatish Balay } 392066d6f7SSatish Balay } 402066d6f7SSatish Balay } 412066d6f7SSatish Balay /* form array of columns we need */ 429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 439566063dSJacob Faibussowitsch PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 442066d6f7SSatish Balay while (tpos) { 459566063dSJacob Faibussowitsch PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 46b0a32e0cSBarry Smith gid--; 47b0a32e0cSBarry Smith lid--; 482066d6f7SSatish Balay garray[lid] = gid; 492066d6f7SSatish Balay } 509566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */ 519566063dSJacob Faibussowitsch PetscCall(PetscTableRemoveAll(gid1_lid1)); 5248a46eb9SPierre Jolivet for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 532066d6f7SSatish Balay /* compact out the extra columns in B */ 54d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 552066d6f7SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 56b1d57f15SBarry Smith PetscInt gid1 = aj[B->i[i] + j] + 1; 579566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 58fa46199cSSatish Balay lid--; 59b3fb0a6cSMatthew Knepley aj[B->i[i] + j] = lid; 602066d6f7SSatish Balay } 612066d6f7SSatish Balay } 629566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 639566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap)); 649566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&gid1_lid1)); 652066d6f7SSatish Balay #else 6611285404SBarry Smith /* Make an array as long as the number of columns */ 671eb62cbbSBarry Smith /* mark those columns that are in aij->B */ 689566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(N, &indices)); 69d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 70d6dfbf8fSBarry Smith for (j = 0; j < B->ilen[i]; j++) { 71bfec09a0SHong Zhang if (!indices[aj[B->i[i] + j]]) ec++; 72bfec09a0SHong Zhang indices[aj[B->i[i] + j]] = 1; 73416022c9SBarry Smith } 741eb62cbbSBarry Smith } 758c79f6d3SBarry Smith 761eb62cbbSBarry Smith /* form array of columns we need */ 779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 781eb62cbbSBarry Smith ec = 0; 791eb62cbbSBarry Smith for (i = 0; i < N; i++) { 801eb62cbbSBarry Smith if (indices[i]) garray[ec++] = i; 811eb62cbbSBarry Smith } 821eb62cbbSBarry Smith 831eb62cbbSBarry Smith /* make indices now point into garray */ 84ad540459SPierre Jolivet for (i = 0; i < ec; i++) indices[garray[i]] = i; 851eb62cbbSBarry Smith 861eb62cbbSBarry Smith /* compact out the extra columns in B */ 87d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 88ad540459SPierre Jolivet for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 89d6dfbf8fSBarry Smith } 909566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 919566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap)); 929566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 932066d6f7SSatish Balay #endif 944b8d542aSHong Zhang } else { 954b8d542aSHong Zhang garray = aij->garray; 964b8d542aSHong Zhang } 974b8d542aSHong Zhang 984b8d542aSHong Zhang if (!aij->lvec) { 9928b400f6SJacob Faibussowitsch PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat"); 1009566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL)); 1014b8d542aSHong Zhang } 1029566063dSJacob Faibussowitsch PetscCall(VecGetSize(aij->lvec, &ec)); 1031eb62cbbSBarry Smith 104d6dfbf8fSBarry Smith /* create two temporary Index sets for build scatter gather */ 1059566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from)); 1069566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to)); 1071eb62cbbSBarry Smith 1081eb62cbbSBarry Smith /* create temporary global vector to generate scatter context */ 109b5eb4454SBarry Smith /* This does not allocate the array's memory so is efficient */ 1109566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 1111eb62cbbSBarry Smith 1122d336d48SLois Curfman McInnes /* generate the scatter context */ 1139566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&aij->Mvctx)); 1149566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx)); 1159566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 11667bb5161SHong Zhang aij->garray = garray; 1174b8d542aSHong Zhang 1189566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1199566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1209566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1213a40ed3dSBarry Smith PetscFunctionReturn(0); 1228c79f6d3SBarry Smith } 1239e25ed09SBarry Smith 124fff043a9SJunchao Zhang /* Disassemble the off-diag portion of the MPIAIJXxx matrix. 125fff043a9SJunchao Zhang In other words, change the B from reduced format using local col ids 126fff043a9SJunchao Zhang to expanded format using global col ids, which will make it easier to 127fff043a9SJunchao Zhang insert new nonzeros (with global col ids) into the matrix. 128fff043a9SJunchao Zhang The off-diag B determines communication in the matrix vector multiply. 1292493cbb0SBarry Smith */ 130d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 131d71ae5a4SJacob Faibussowitsch { 1322493cbb0SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 1332493cbb0SBarry Smith Mat B = aij->B, Bnew; 134ec8511deSBarry Smith Mat_SeqAIJ *Baij = (Mat_SeqAIJ *)B->data; 1354dfa11a4SJacob Faibussowitsch PetscInt i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz; 13687828ca2SBarry Smith PetscScalar v; 137fff043a9SJunchao Zhang const PetscScalar *ba; 1382493cbb0SBarry Smith 1393a40ed3dSBarry Smith PetscFunctionBegin; 1402493cbb0SBarry Smith /* free stuff related to matrix-vec multiply */ 1419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aij->lvec)); 142464493b3SBarry Smith if (aij->colmap) { 143aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 1449566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&aij->colmap)); 1452066d6f7SSatish Balay #else 1469566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->colmap)); 1472066d6f7SSatish Balay #endif 148464493b3SBarry Smith } 1492493cbb0SBarry Smith 1502493cbb0SBarry Smith /* make sure that B is assembled so we can access its values */ 1519566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1529566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1532493cbb0SBarry Smith 1542493cbb0SBarry Smith /* invent new B and copy stuff over */ 1559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &nz)); 156ad540459SPierre Jolivet for (i = 0; i < m; i++) nz[i] = Baij->i[i + 1] - Baij->i[i]; 1579566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 1589566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */ 1599566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(Bnew, A, A)); 1609566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 1619566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz)); 1622205254eSKarl Rupp 163b38c15b3SStefano Zampini if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 164b38c15b3SStefano Zampini ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew; 165b38c15b3SStefano Zampini } 166b38c15b3SStefano Zampini 16777341eacSDmitry Karpeev /* 16877341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing. 16977341eacSDmitry Karpeev Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 17077341eacSDmitry Karpeev */ 171f69fde56SShane Stafford Bnew->nonzerostate = B->nonzerostate; 1722205254eSKarl Rupp 1739566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 1749566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B, &ba)); 1752493cbb0SBarry Smith for (i = 0; i < m; i++) { 176bfec09a0SHong Zhang for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) { 177bfec09a0SHong Zhang col = garray[Baij->j[ct]]; 178fff043a9SJunchao Zhang v = ba[ct++]; 1799566063dSJacob Faibussowitsch PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode)); 1802493cbb0SBarry Smith } 1812493cbb0SBarry Smith } 1829566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 183fff043a9SJunchao Zhang 1849566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->garray)); 1859566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1862205254eSKarl Rupp 1872493cbb0SBarry Smith aij->B = Bnew; 188227d817aSBarry Smith A->was_assembled = PETSC_FALSE; 189*247fa90bSBarry Smith A->assembled = PETSC_FALSE; 1903a40ed3dSBarry Smith PetscFunctionReturn(0); 1912493cbb0SBarry Smith } 1922493cbb0SBarry Smith 1932cd6534aSBarry Smith /* ugly stuff added for Glenn someday we should fix this up */ 1942cd6534aSBarry Smith 195f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 196f4259b30SLisandro Dalcin static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 1972cd6534aSBarry Smith 198d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) 199d71ae5a4SJacob Faibussowitsch { 2002cd6534aSBarry Smith Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */ 201b1d57f15SBarry Smith PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices; 202b1d57f15SBarry Smith PetscInt *r_rmapd, *r_rmapo; 2032cd6534aSBarry Smith 2042cd6534aSBarry Smith PetscFunctionBegin; 2059566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 2069566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n)); 2079566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 2082cd6534aSBarry Smith nt = 0; 209992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 210992144d0SBarry Smith if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 2112cd6534aSBarry Smith nt++; 212992144d0SBarry Smith r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 2132cd6534aSBarry Smith } 2142cd6534aSBarry Smith } 21508401ef6SPierre Jolivet PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n); 2169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &auglyrmapd)); 217992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 218ad540459SPierre Jolivet if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; 2192cd6534aSBarry Smith } 2209566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd)); 2219566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd)); 2222cd6534aSBarry Smith 2239566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices)); 224ad540459SPierre Jolivet for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1; 225992144d0SBarry Smith no = inA->rmap->mapping->n - nt; 2269566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 2272cd6534aSBarry Smith nt = 0; 228992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 229992144d0SBarry Smith if (lindices[inA->rmap->mapping->indices[i]]) { 2302cd6534aSBarry Smith nt++; 231992144d0SBarry Smith r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 2322cd6534aSBarry Smith } 2332cd6534aSBarry Smith } 23408401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 2359566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices)); 2369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nt + 1, &auglyrmapo)); 237992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 238ad540459SPierre Jolivet if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i; 2392cd6534aSBarry Smith } 2409566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo)); 2419566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo)); 2422cd6534aSBarry Smith PetscFunctionReturn(0); 2432cd6534aSBarry Smith } 2442cd6534aSBarry Smith 245d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A, Vec scale) 246d71ae5a4SJacob Faibussowitsch { 24792b32695SKris Buschelman /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 24892b32695SKris Buschelman 24992b32695SKris Buschelman PetscFunctionBegin; 250cac4c232SBarry Smith PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 25192b32695SKris Buschelman PetscFunctionReturn(0); 25292b32695SKris Buschelman } 25392b32695SKris Buschelman 254d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale) 255d71ae5a4SJacob Faibussowitsch { 2562cd6534aSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */ 257b1d57f15SBarry Smith PetscInt n, i; 258bca11509SBarry Smith PetscScalar *d, *o; 259bca11509SBarry Smith const PetscScalar *s; 2602cd6534aSBarry Smith 2612cd6534aSBarry Smith PetscFunctionBegin; 26248a46eb9SPierre Jolivet if (!auglyrmapd) PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale)); 2632cd6534aSBarry Smith 2649566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s)); 2652cd6534aSBarry Smith 2669566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglydd, &n)); 2679566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglydd, &d)); 2689371c9d4SSatish Balay for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 2699566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglydd, &d)); 2702cd6534aSBarry Smith /* column scale "diagonal" portion of local matrix */ 2719566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, auglydd)); 2722cd6534aSBarry Smith 2739566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglyoo, &n)); 2749566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglyoo, &o)); 2759371c9d4SSatish Balay for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 2769566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s)); 2779566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglyoo, &o)); 2782cd6534aSBarry Smith /* column scale "off-diagonal" portion of local matrix */ 2799566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, auglyoo)); 2802cd6534aSBarry Smith PetscFunctionReturn(0); 2812cd6534aSBarry Smith } 282