1be1d678aSKris Buschelman 28c79f6d3SBarry Smith /* 38c79f6d3SBarry Smith Support for the parallel AIJ matrix vector multiply 48c79f6d3SBarry Smith */ 5c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 6aeda0f58SHong Zhang #include <petsc/private/vecimpl.h> 7af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 88c79f6d3SBarry Smith 99371c9d4SSatish Balay PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) { 1044a69424SLois Curfman McInnes Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 11ec8511deSBarry Smith Mat_SeqAIJ *B = (Mat_SeqAIJ *)(aij->B->data); 12b3c64f9dSJunchao Zhang PetscInt i, j, *aj = B->j, *garray; 13b3c64f9dSJunchao Zhang PetscInt ec = 0; /* Number of nonzero external columns */ 141eb62cbbSBarry Smith IS from, to; 151eb62cbbSBarry Smith Vec gvec; 16aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 170f5bd95cSBarry Smith PetscTable gid1_lid1; 180f5bd95cSBarry Smith PetscTablePosition tpos; 19b1d57f15SBarry Smith PetscInt gid, lid; 206f531f54SSatish Balay #else 21d0f46423SBarry Smith PetscInt N = mat->cmap->N, *indices; 222066d6f7SSatish Balay #endif 232066d6f7SSatish Balay 243a40ed3dSBarry Smith PetscFunctionBegin; 254b8d542aSHong Zhang if (!aij->garray) { 2628b400f6SJacob Faibussowitsch PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat"); 27aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 28c5bfad50SMark F. Adams /* use a table */ 299566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(aij->B->rmap->n, mat->cmap->N + 1, &gid1_lid1)); 30d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 312066d6f7SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 32b1d57f15SBarry Smith PetscInt data, gid1 = aj[B->i[i] + j] + 1; 339566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 34fa46199cSSatish Balay if (!data) { 352066d6f7SSatish Balay /* one based table */ 369566063dSJacob Faibussowitsch PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 372066d6f7SSatish Balay } 382066d6f7SSatish Balay } 392066d6f7SSatish Balay } 402066d6f7SSatish Balay /* form array of columns we need */ 419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 429566063dSJacob Faibussowitsch PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 432066d6f7SSatish Balay while (tpos) { 449566063dSJacob Faibussowitsch PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 45b0a32e0cSBarry Smith gid--; 46b0a32e0cSBarry Smith lid--; 472066d6f7SSatish Balay garray[lid] = gid; 482066d6f7SSatish Balay } 499566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */ 509566063dSJacob Faibussowitsch PetscCall(PetscTableRemoveAll(gid1_lid1)); 51*48a46eb9SPierre Jolivet for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 522066d6f7SSatish Balay /* compact out the extra columns in B */ 53d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 542066d6f7SSatish Balay for (j = 0; j < B->ilen[i]; j++) { 55b1d57f15SBarry Smith PetscInt gid1 = aj[B->i[i] + j] + 1; 569566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 57fa46199cSSatish Balay lid--; 58b3fb0a6cSMatthew Knepley aj[B->i[i] + j] = lid; 592066d6f7SSatish Balay } 602066d6f7SSatish Balay } 619566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 629566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap)); 639566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&gid1_lid1)); 642066d6f7SSatish Balay #else 6511285404SBarry Smith /* Make an array as long as the number of columns */ 661eb62cbbSBarry Smith /* mark those columns that are in aij->B */ 679566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(N, &indices)); 68d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 69d6dfbf8fSBarry Smith for (j = 0; j < B->ilen[i]; j++) { 70bfec09a0SHong Zhang if (!indices[aj[B->i[i] + j]]) ec++; 71bfec09a0SHong Zhang indices[aj[B->i[i] + j]] = 1; 72416022c9SBarry Smith } 731eb62cbbSBarry Smith } 748c79f6d3SBarry Smith 751eb62cbbSBarry Smith /* form array of columns we need */ 769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec, &garray)); 771eb62cbbSBarry Smith ec = 0; 781eb62cbbSBarry Smith for (i = 0; i < N; i++) { 791eb62cbbSBarry Smith if (indices[i]) garray[ec++] = i; 801eb62cbbSBarry Smith } 811eb62cbbSBarry Smith 821eb62cbbSBarry Smith /* make indices now point into garray */ 839371c9d4SSatish Balay for (i = 0; i < ec; i++) { indices[garray[i]] = i; } 841eb62cbbSBarry Smith 851eb62cbbSBarry Smith /* compact out the extra columns in B */ 86d0f46423SBarry Smith for (i = 0; i < aij->B->rmap->n; i++) { 879371c9d4SSatish Balay for (j = 0; j < B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } 88d6dfbf8fSBarry Smith } 899566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 909566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap)); 919566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 922066d6f7SSatish Balay #endif 934b8d542aSHong Zhang } else { 944b8d542aSHong Zhang garray = aij->garray; 954b8d542aSHong Zhang } 964b8d542aSHong Zhang 974b8d542aSHong Zhang if (!aij->lvec) { 9828b400f6SJacob Faibussowitsch PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat"); 999566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL)); 1004b8d542aSHong Zhang } 1019566063dSJacob Faibussowitsch PetscCall(VecGetSize(aij->lvec, &ec)); 1021eb62cbbSBarry Smith 103d6dfbf8fSBarry Smith /* create two temporary Index sets for build scatter gather */ 1049566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from)); 1059566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to)); 1061eb62cbbSBarry Smith 1071eb62cbbSBarry Smith /* create temporary global vector to generate scatter context */ 108b5eb4454SBarry Smith /* This does not allocate the array's memory so is efficient */ 1099566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 1101eb62cbbSBarry Smith 1112d336d48SLois Curfman McInnes /* generate the scatter context */ 1129566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&aij->Mvctx)); 1139566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx)); 1149566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 1159566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)aij->Mvctx)); 1169566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)aij->lvec)); 1179566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)mat, ec * sizeof(PetscInt))); 11867bb5161SHong Zhang aij->garray = garray; 1194b8d542aSHong Zhang 1209566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from)); 1219566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to)); 1222205254eSKarl Rupp 1239566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 1249566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 1259566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 1263a40ed3dSBarry Smith PetscFunctionReturn(0); 1278c79f6d3SBarry Smith } 1289e25ed09SBarry Smith 129fff043a9SJunchao Zhang /* Disassemble the off-diag portion of the MPIAIJXxx matrix. 130fff043a9SJunchao Zhang In other words, change the B from reduced format using local col ids 131fff043a9SJunchao Zhang to expanded format using global col ids, which will make it easier to 132fff043a9SJunchao Zhang insert new nonzeros (with global col ids) into the matrix. 133fff043a9SJunchao Zhang The off-diag B determines communication in the matrix vector multiply. 1342493cbb0SBarry Smith */ 1359371c9d4SSatish Balay PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) { 1362493cbb0SBarry Smith Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 1372493cbb0SBarry Smith Mat B = aij->B, Bnew; 138ec8511deSBarry Smith Mat_SeqAIJ *Baij = (Mat_SeqAIJ *)B->data; 139d0f46423SBarry Smith PetscInt i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz, ec; 14087828ca2SBarry Smith PetscScalar v; 141fff043a9SJunchao Zhang const PetscScalar *ba; 1422493cbb0SBarry Smith 1433a40ed3dSBarry Smith PetscFunctionBegin; 1442493cbb0SBarry Smith /* free stuff related to matrix-vec multiply */ 1459566063dSJacob Faibussowitsch PetscCall(VecGetSize(aij->lvec, &ec)); /* needed for PetscLogObjectMemory below */ 1469566063dSJacob Faibussowitsch PetscCall(VecDestroy(&aij->lvec)); 147464493b3SBarry Smith if (aij->colmap) { 148aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 1499566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&aij->colmap)); 1502066d6f7SSatish Balay #else 1519566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->colmap)); 1529566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, -aij->B->cmap->n * sizeof(PetscInt))); 1532066d6f7SSatish Balay #endif 154464493b3SBarry Smith } 1552493cbb0SBarry Smith 1562493cbb0SBarry Smith /* make sure that B is assembled so we can access its values */ 1579566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1589566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1592493cbb0SBarry Smith 1602493cbb0SBarry Smith /* invent new B and copy stuff over */ 1619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &nz)); 1629371c9d4SSatish Balay for (i = 0; i < m; i++) { nz[i] = Baij->i[i + 1] - Baij->i[i]; } 1639566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 1649566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */ 1659566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(Bnew, A, A)); 1669566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 1679566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz)); 1682205254eSKarl Rupp 169b38c15b3SStefano Zampini if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 170b38c15b3SStefano Zampini ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew; 171b38c15b3SStefano Zampini } 172b38c15b3SStefano Zampini 17377341eacSDmitry Karpeev /* 17477341eacSDmitry Karpeev Ensure that B's nonzerostate is monotonically increasing. 17577341eacSDmitry Karpeev Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 17677341eacSDmitry Karpeev */ 177f69fde56SShane Stafford Bnew->nonzerostate = B->nonzerostate; 1782205254eSKarl Rupp 1799566063dSJacob Faibussowitsch PetscCall(PetscFree(nz)); 1809566063dSJacob Faibussowitsch PetscCall(MatSeqAIJGetArrayRead(B, &ba)); 1812493cbb0SBarry Smith for (i = 0; i < m; i++) { 182bfec09a0SHong Zhang for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) { 183bfec09a0SHong Zhang col = garray[Baij->j[ct]]; 184fff043a9SJunchao Zhang v = ba[ct++]; 1859566063dSJacob Faibussowitsch PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode)); 1862493cbb0SBarry Smith } 1872493cbb0SBarry Smith } 1889566063dSJacob Faibussowitsch PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 189fff043a9SJunchao Zhang 1909566063dSJacob Faibussowitsch PetscCall(PetscFree(aij->garray)); 1919566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A, -ec * sizeof(PetscInt))); 1929566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 1939566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)Bnew)); 1942205254eSKarl Rupp 1952493cbb0SBarry Smith aij->B = Bnew; 196227d817aSBarry Smith A->was_assembled = PETSC_FALSE; 1973a40ed3dSBarry Smith PetscFunctionReturn(0); 1982493cbb0SBarry Smith } 1992493cbb0SBarry Smith 2002cd6534aSBarry Smith /* ugly stuff added for Glenn someday we should fix this up */ 2012cd6534aSBarry Smith 202f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 203f4259b30SLisandro Dalcin static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 2042cd6534aSBarry Smith 2059371c9d4SSatish Balay PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) { 2062cd6534aSBarry Smith Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */ 207b1d57f15SBarry Smith PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices; 208b1d57f15SBarry Smith PetscInt *r_rmapd, *r_rmapo; 2092cd6534aSBarry Smith 2102cd6534aSBarry Smith PetscFunctionBegin; 2119566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 2129566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A, NULL, &n)); 2139566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 2142cd6534aSBarry Smith nt = 0; 215992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 216992144d0SBarry Smith if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 2172cd6534aSBarry Smith nt++; 218992144d0SBarry Smith r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 2192cd6534aSBarry Smith } 2202cd6534aSBarry Smith } 22108401ef6SPierre Jolivet PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n); 2229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &auglyrmapd)); 223992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 2249371c9d4SSatish Balay if (r_rmapd[i]) { auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; } 2252cd6534aSBarry Smith } 2269566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd)); 2279566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd)); 2282cd6534aSBarry Smith 2299566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices)); 2309371c9d4SSatish Balay for (i = 0; i < ina->B->cmap->n; i++) { lindices[garray[i]] = i + 1; } 231992144d0SBarry Smith no = inA->rmap->mapping->n - nt; 2329566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 2332cd6534aSBarry Smith nt = 0; 234992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 235992144d0SBarry Smith if (lindices[inA->rmap->mapping->indices[i]]) { 2362cd6534aSBarry Smith nt++; 237992144d0SBarry Smith r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 2382cd6534aSBarry Smith } 2392cd6534aSBarry Smith } 24008401ef6SPierre Jolivet PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 2419566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices)); 2429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nt + 1, &auglyrmapo)); 243992144d0SBarry Smith for (i = 0; i < inA->rmap->mapping->n; i++) { 2449371c9d4SSatish Balay if (r_rmapo[i]) { auglyrmapo[(r_rmapo[i] - 1)] = i; } 2452cd6534aSBarry Smith } 2469566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo)); 2479566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo)); 2482cd6534aSBarry Smith PetscFunctionReturn(0); 2492cd6534aSBarry Smith } 2502cd6534aSBarry Smith 2519371c9d4SSatish Balay PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A, Vec scale) { 25292b32695SKris Buschelman /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 25392b32695SKris Buschelman 25492b32695SKris Buschelman PetscFunctionBegin; 255cac4c232SBarry Smith PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 25692b32695SKris Buschelman PetscFunctionReturn(0); 25792b32695SKris Buschelman } 25892b32695SKris Buschelman 2599371c9d4SSatish Balay PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale) { 2602cd6534aSBarry Smith Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */ 261b1d57f15SBarry Smith PetscInt n, i; 262bca11509SBarry Smith PetscScalar *d, *o; 263bca11509SBarry Smith const PetscScalar *s; 2642cd6534aSBarry Smith 2652cd6534aSBarry Smith PetscFunctionBegin; 266*48a46eb9SPierre Jolivet if (!auglyrmapd) PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale)); 2672cd6534aSBarry Smith 2689566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale, &s)); 2692cd6534aSBarry Smith 2709566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglydd, &n)); 2719566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglydd, &d)); 2729371c9d4SSatish Balay for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 2739566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglydd, &d)); 2742cd6534aSBarry Smith /* column scale "diagonal" portion of local matrix */ 2759566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A, NULL, auglydd)); 2762cd6534aSBarry Smith 2779566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglyoo, &n)); 2789566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglyoo, &o)); 2799371c9d4SSatish Balay for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 2809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale, &s)); 2819566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglyoo, &o)); 2822cd6534aSBarry Smith /* column scale "off-diagonal" portion of local matrix */ 2839566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B, NULL, auglyoo)); 2842cd6534aSBarry Smith PetscFunctionReturn(0); 2852cd6534aSBarry Smith } 286