xref: /petsc/src/mat/impls/aij/mpi/mmaij.c (revision eb14dd146a89a5ef91eac0ce18ce74e06f74fd1f)
18c79f6d3SBarry Smith /*
28c79f6d3SBarry Smith    Support for the parallel AIJ matrix vector multiply
38c79f6d3SBarry Smith */
4c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
5aeda0f58SHong Zhang #include <petsc/private/vecimpl.h>
6af0996ceSBarry Smith #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
78c79f6d3SBarry Smith 
8d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat)
9d71ae5a4SJacob Faibussowitsch {
1044a69424SLois Curfman McInnes   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
11f4f49eeaSPierre Jolivet   Mat_SeqAIJ *B   = (Mat_SeqAIJ *)aij->B->data;
12b3c64f9dSJunchao Zhang   PetscInt    i, j, *aj = B->j, *garray;
13b3c64f9dSJunchao Zhang   PetscInt    ec = 0; /* Number of nonzero external columns */
141eb62cbbSBarry Smith   IS          from, to;
151eb62cbbSBarry Smith   Vec         gvec;
16aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
17eec179cfSJacob Faibussowitsch   PetscHMapI    gid1_lid1 = NULL;
18eec179cfSJacob Faibussowitsch   PetscHashIter tpos;
19b1d57f15SBarry Smith   PetscInt      gid, lid;
206f531f54SSatish Balay #else
21d0f46423SBarry Smith   PetscInt N = mat->cmap->N, *indices;
222066d6f7SSatish Balay #endif
232066d6f7SSatish Balay 
243a40ed3dSBarry Smith   PetscFunctionBegin;
254b8d542aSHong Zhang   if (!aij->garray) {
2628b400f6SJacob Faibussowitsch     PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
27aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
28c5bfad50SMark F. Adams     /* use a table */
29eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapICreateWithSize(aij->B->rmap->n, &gid1_lid1));
30d0f46423SBarry Smith     for (i = 0; i < aij->B->rmap->n; i++) {
312066d6f7SSatish Balay       for (j = 0; j < B->ilen[i]; j++) {
32b1d57f15SBarry Smith         PetscInt data, gid1 = aj[B->i[i] + j] + 1;
33eec179cfSJacob Faibussowitsch         PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
34fa46199cSSatish Balay         if (!data) {
352066d6f7SSatish Balay           /* one based table */
36c76ffc5fSJacob Faibussowitsch           PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
372066d6f7SSatish Balay         }
382066d6f7SSatish Balay       }
392066d6f7SSatish Balay     }
402066d6f7SSatish Balay     /* form array of columns we need */
419566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ec, &garray));
42eec179cfSJacob Faibussowitsch     PetscHashIterBegin(gid1_lid1, tpos);
43eec179cfSJacob Faibussowitsch     while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
44eec179cfSJacob Faibussowitsch       PetscHashIterGetKey(gid1_lid1, tpos, gid);
45eec179cfSJacob Faibussowitsch       PetscHashIterGetVal(gid1_lid1, tpos, lid);
46eec179cfSJacob Faibussowitsch       PetscHashIterNext(gid1_lid1, tpos);
47b0a32e0cSBarry Smith       gid--;
48b0a32e0cSBarry Smith       lid--;
492066d6f7SSatish Balay       garray[lid] = gid;
502066d6f7SSatish Balay     }
519566063dSJacob Faibussowitsch     PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */
52eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapIClear(gid1_lid1));
53c76ffc5fSJacob Faibussowitsch     for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
542066d6f7SSatish Balay     /* compact out the extra columns in B */
55d0f46423SBarry Smith     for (i = 0; i < aij->B->rmap->n; i++) {
562066d6f7SSatish Balay       for (j = 0; j < B->ilen[i]; j++) {
57b1d57f15SBarry Smith         PetscInt gid1 = aj[B->i[i] + j] + 1;
58eec179cfSJacob Faibussowitsch         PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
59fa46199cSSatish Balay         lid--;
60b3fb0a6cSMatthew Knepley         aj[B->i[i] + j] = lid;
612066d6f7SSatish Balay       }
622066d6f7SSatish Balay     }
639566063dSJacob Faibussowitsch     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
649566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
65eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapIDestroy(&gid1_lid1));
662066d6f7SSatish Balay #else
6711285404SBarry Smith     /* Make an array as long as the number of columns */
681eb62cbbSBarry Smith     /* mark those columns that are in aij->B */
699566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(N, &indices));
70d0f46423SBarry Smith     for (i = 0; i < aij->B->rmap->n; i++) {
71d6dfbf8fSBarry Smith       for (j = 0; j < B->ilen[i]; j++) {
72bfec09a0SHong Zhang         if (!indices[aj[B->i[i] + j]]) ec++;
73bfec09a0SHong Zhang         indices[aj[B->i[i] + j]] = 1;
74416022c9SBarry Smith       }
751eb62cbbSBarry Smith     }
768c79f6d3SBarry Smith 
771eb62cbbSBarry Smith     /* form array of columns we need */
789566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ec, &garray));
791eb62cbbSBarry Smith     ec = 0;
801eb62cbbSBarry Smith     for (i = 0; i < N; i++) {
811eb62cbbSBarry Smith       if (indices[i]) garray[ec++] = i;
821eb62cbbSBarry Smith     }
831eb62cbbSBarry Smith 
841eb62cbbSBarry Smith     /* make indices now point into garray */
85ad540459SPierre Jolivet     for (i = 0; i < ec; i++) indices[garray[i]] = i;
861eb62cbbSBarry Smith 
871eb62cbbSBarry Smith     /* compact out the extra columns in B */
88d0f46423SBarry Smith     for (i = 0; i < aij->B->rmap->n; i++) {
89ad540459SPierre Jolivet       for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
90d6dfbf8fSBarry Smith     }
919566063dSJacob Faibussowitsch     PetscCall(PetscLayoutDestroy(&aij->B->cmap));
929566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap));
939566063dSJacob Faibussowitsch     PetscCall(PetscFree(indices));
942066d6f7SSatish Balay #endif
954b8d542aSHong Zhang   } else {
964b8d542aSHong Zhang     garray = aij->garray;
974b8d542aSHong Zhang   }
984b8d542aSHong Zhang 
994b8d542aSHong Zhang   if (!aij->lvec) {
10028b400f6SJacob Faibussowitsch     PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat");
1019566063dSJacob Faibussowitsch     PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL));
1024b8d542aSHong Zhang   }
1039566063dSJacob Faibussowitsch   PetscCall(VecGetSize(aij->lvec, &ec));
1041eb62cbbSBarry Smith 
105d6dfbf8fSBarry Smith   /* create two temporary Index sets for build scatter gather */
1069566063dSJacob Faibussowitsch   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from));
1079566063dSJacob Faibussowitsch   PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to));
1081eb62cbbSBarry Smith 
1091eb62cbbSBarry Smith   /* create temporary global vector to generate scatter context */
110b5eb4454SBarry Smith   /* This does not allocate the array's memory so is efficient */
1119566063dSJacob Faibussowitsch   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
1121eb62cbbSBarry Smith 
1132d336d48SLois Curfman McInnes   /* generate the scatter context */
1149566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&aij->Mvctx));
1159566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx));
1169566063dSJacob Faibussowitsch   PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
11767bb5161SHong Zhang   aij->garray = garray;
1184b8d542aSHong Zhang 
1199566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1209566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
1219566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&gvec));
1223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1238c79f6d3SBarry Smith }
1249e25ed09SBarry Smith 
125fff043a9SJunchao Zhang /* Disassemble the off-diag portion of the MPIAIJXxx matrix.
126fff043a9SJunchao Zhang    In other words, change the B from reduced format using local col ids
127fff043a9SJunchao Zhang    to expanded format using global col ids, which will make it easier to
128fff043a9SJunchao Zhang    insert new nonzeros (with global col ids) into the matrix.
129fff043a9SJunchao Zhang    The off-diag B determines communication in the matrix vector multiply.
130*eb14dd14SAlex Lindsay    use_preallocation determines whether the use the preallocation or
131*eb14dd14SAlex Lindsay    existing non-zero structure when creating the global form of B
1322493cbb0SBarry Smith */
133*eb14dd14SAlex Lindsay PetscErrorCode MatDisAssemble_MPIAIJ(Mat A, PetscBool use_preallocation)
134d71ae5a4SJacob Faibussowitsch {
1352493cbb0SBarry Smith   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data;
1367c612885SStefano Zampini   Mat         B = aij->B, Bnew = NULL;
1372493cbb0SBarry Smith 
1383a40ed3dSBarry Smith   PetscFunctionBegin;
1392493cbb0SBarry Smith   /* free stuff related to matrix-vec multiply */
1409566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&aij->lvec));
141464493b3SBarry Smith   if (aij->colmap) {
142aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
143eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapIDestroy(&aij->colmap));
1442066d6f7SSatish Balay #else
1459566063dSJacob Faibussowitsch     PetscCall(PetscFree(aij->colmap));
1462066d6f7SSatish Balay #endif
147464493b3SBarry Smith   }
1482493cbb0SBarry Smith 
1497c612885SStefano Zampini   if (B) {
1507c612885SStefano Zampini     Mat_SeqAIJ        *Baij = (Mat_SeqAIJ *)B->data;
1517c612885SStefano Zampini     PetscInt           i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz;
1527c612885SStefano Zampini     PetscScalar        v;
1537c612885SStefano Zampini     const PetscScalar *ba;
1547c612885SStefano Zampini 
1552493cbb0SBarry Smith     /* make sure that B is assembled so we can access its values */
1569566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1579566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1582493cbb0SBarry Smith 
1592493cbb0SBarry Smith     /* invent new B and copy stuff over */
1609566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(m + 1, &nz));
161*eb14dd14SAlex Lindsay     if (use_preallocation)
162*eb14dd14SAlex Lindsay       for (i = 0; i < m; i++) nz[i] = Baij->ipre[i];
163*eb14dd14SAlex Lindsay     else
164ad540459SPierre Jolivet       for (i = 0; i < m; i++) nz[i] = Baij->i[i + 1] - Baij->i[i];
1659566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
1669566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */
1679566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(Bnew, A, A));
1689566063dSJacob Faibussowitsch     PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
1699566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz));
1702205254eSKarl Rupp 
171b38c15b3SStefano Zampini     if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */
172b38c15b3SStefano Zampini       ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew;
173b38c15b3SStefano Zampini     }
174b38c15b3SStefano Zampini 
17577341eacSDmitry Karpeev     /*
17677341eacSDmitry Karpeev      Ensure that B's nonzerostate is monotonically increasing.
17777341eacSDmitry Karpeev      Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
17877341eacSDmitry Karpeev      */
179f69fde56SShane Stafford     Bnew->nonzerostate = B->nonzerostate;
1802205254eSKarl Rupp 
1819566063dSJacob Faibussowitsch     PetscCall(PetscFree(nz));
1829566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJGetArrayRead(B, &ba));
1832493cbb0SBarry Smith     for (i = 0; i < m; i++) {
184bfec09a0SHong Zhang       for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) {
185bfec09a0SHong Zhang         col = garray[Baij->j[ct]];
186fff043a9SJunchao Zhang         v   = ba[ct++];
1879566063dSJacob Faibussowitsch         PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode));
1882493cbb0SBarry Smith       }
1892493cbb0SBarry Smith     }
1909566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
1919566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
1927c612885SStefano Zampini   }
1937c612885SStefano Zampini   PetscCall(PetscFree(aij->garray));
1942205254eSKarl Rupp 
1952493cbb0SBarry Smith   aij->B           = Bnew;
196227d817aSBarry Smith   A->was_assembled = PETSC_FALSE;
197247fa90bSBarry Smith   A->assembled     = PETSC_FALSE;
1983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1992493cbb0SBarry Smith }
2002493cbb0SBarry Smith 
2012cd6534aSBarry Smith /*      ugly stuff added for Glenn someday we should fix this up */
2022cd6534aSBarry Smith 
203f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
204f4259b30SLisandro Dalcin static Vec       auglydd = NULL, auglyoo = NULL;        /* work vectors used to scale the two parts of the local matrix */
2052cd6534aSBarry Smith 
206ba38deedSJacob Faibussowitsch static PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
207d71ae5a4SJacob Faibussowitsch {
2082cd6534aSBarry Smith   Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */
209b1d57f15SBarry Smith   PetscInt    i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices;
210b1d57f15SBarry Smith   PetscInt   *r_rmapd, *r_rmapo;
2112cd6534aSBarry Smith 
2122cd6534aSBarry Smith   PetscFunctionBegin;
2139566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
2149566063dSJacob Faibussowitsch   PetscCall(MatGetSize(ina->A, NULL, &n));
2159566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
2162cd6534aSBarry Smith   nt = 0;
217992144d0SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
218992144d0SBarry Smith     if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) {
2192cd6534aSBarry Smith       nt++;
220992144d0SBarry Smith       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
2212cd6534aSBarry Smith     }
2222cd6534aSBarry Smith   }
22308401ef6SPierre Jolivet   PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n);
2249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &auglyrmapd));
225992144d0SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
226ad540459SPierre Jolivet     if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i;
2272cd6534aSBarry Smith   }
2289566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapd));
2299566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd));
2302cd6534aSBarry Smith 
2319566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices));
232ad540459SPierre Jolivet   for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1;
233992144d0SBarry Smith   no = inA->rmap->mapping->n - nt;
2349566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
2352cd6534aSBarry Smith   nt = 0;
236992144d0SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
237992144d0SBarry Smith     if (lindices[inA->rmap->mapping->indices[i]]) {
2382cd6534aSBarry Smith       nt++;
239992144d0SBarry Smith       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
2402cd6534aSBarry Smith     }
2412cd6534aSBarry Smith   }
24208401ef6SPierre Jolivet   PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
2439566063dSJacob Faibussowitsch   PetscCall(PetscFree(lindices));
2449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nt + 1, &auglyrmapo));
245992144d0SBarry Smith   for (i = 0; i < inA->rmap->mapping->n; i++) {
246ad540459SPierre Jolivet     if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i;
2472cd6534aSBarry Smith   }
2489566063dSJacob Faibussowitsch   PetscCall(PetscFree(r_rmapo));
2499566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo));
2503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2512cd6534aSBarry Smith }
2522cd6534aSBarry Smith 
253d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale)
254d71ae5a4SJacob Faibussowitsch {
2552cd6534aSBarry Smith   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */
256b1d57f15SBarry Smith   PetscInt           n, i;
257bca11509SBarry Smith   PetscScalar       *d, *o;
258bca11509SBarry Smith   const PetscScalar *s;
2592cd6534aSBarry Smith 
2602cd6534aSBarry Smith   PetscFunctionBegin;
26148a46eb9SPierre Jolivet   if (!auglyrmapd) PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale));
2622cd6534aSBarry Smith 
2639566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(scale, &s));
2642cd6534aSBarry Smith 
2659566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(auglydd, &n));
2669566063dSJacob Faibussowitsch   PetscCall(VecGetArray(auglydd, &d));
2679371c9d4SSatish Balay   for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
2689566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(auglydd, &d));
2692cd6534aSBarry Smith   /* column scale "diagonal" portion of local matrix */
2709566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->A, NULL, auglydd));
2712cd6534aSBarry Smith 
2729566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(auglyoo, &n));
2739566063dSJacob Faibussowitsch   PetscCall(VecGetArray(auglyoo, &o));
2749371c9d4SSatish Balay   for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
2759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(scale, &s));
2769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(auglyoo, &o));
2772cd6534aSBarry Smith   /* column scale "off-diagonal" portion of local matrix */
2789566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale(a->B, NULL, auglyoo));
2793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2802cd6534aSBarry Smith }
281