xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision eec179cf895b6fbcd6a0b58694319392b06e361b)
1a30f8f8cSSatish Balay 
2a30f8f8cSSatish Balay /*
3a30f8f8cSSatish Balay    Support for the parallel SBAIJ matrix vector multiply
4a30f8f8cSSatish Balay */
5c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
67cff1425SSatish Balay 
7d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8d71ae5a4SJacob Faibussowitsch {
940781036SHong Zhang   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ *)mat->data;
1040781036SHong Zhang   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ *)(sbaij->B->data);
11*eec179cfSJacob Faibussowitsch   PetscInt        i, j, *aj = B->j, ec = 0, *garray, *sgarray;
12d0f46423SBarry Smith   PetscInt        bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
1340781036SHong Zhang   IS              from, to;
1440781036SHong Zhang   Vec             gvec;
15a11b0bdaSJunchao Zhang   PetscMPIInt     rank   = sbaij->rank, lsize;
16077aedafSJed Brown   PetscInt       *owners = sbaij->rangebs, *ec_owner, k;
17077aedafSJed Brown   const PetscInt *sowners;
1840781036SHong Zhang   PetscScalar    *ptr;
19a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE)
20*eec179cfSJacob Faibussowitsch   PetscHMapI    gid1_lid1 = NULL; /* one-based gid to lid table */
21*eec179cfSJacob Faibussowitsch   PetscHashIter tpos;
22a11b0bdaSJunchao Zhang   PetscInt      gid, lid;
23a11b0bdaSJunchao Zhang #else
24*eec179cfSJacob Faibussowitsch   PetscInt  Nbs = sbaij->Nbs;
25a11b0bdaSJunchao Zhang   PetscInt *indices;
26a11b0bdaSJunchao Zhang #endif
2740781036SHong Zhang 
2840781036SHong Zhang   PetscFunctionBegin;
29a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE)
30*eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1));
31a11b0bdaSJunchao Zhang   for (i = 0; i < B->mbs; i++) {
32a11b0bdaSJunchao Zhang     for (j = 0; j < B->ilen[i]; j++) {
33a11b0bdaSJunchao Zhang       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
34*eec179cfSJacob Faibussowitsch       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
35*eec179cfSJacob Faibussowitsch       if (!data) PetscCall(PetscHMapISetWithMode(gid1_lid1, gid1, ++ec, INSERT_VALUES));
36a11b0bdaSJunchao Zhang     }
37a11b0bdaSJunchao Zhang   }
38a11b0bdaSJunchao Zhang   /* form array of columns we need */
399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
40*eec179cfSJacob Faibussowitsch   PetscHashIterBegin(gid1_lid1, tpos);
41*eec179cfSJacob Faibussowitsch   while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
42*eec179cfSJacob Faibussowitsch     PetscHashIterGetKey(gid1_lid1, tpos, gid);
43*eec179cfSJacob Faibussowitsch     PetscHashIterGetVal(gid1_lid1, tpos, lid);
44*eec179cfSJacob Faibussowitsch     PetscHashIterNext(gid1_lid1, tpos);
459371c9d4SSatish Balay     gid--;
469371c9d4SSatish Balay     lid--;
47a11b0bdaSJunchao Zhang     garray[lid] = gid;
48a11b0bdaSJunchao Zhang   }
499566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(ec, garray));
50*eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIClear(gid1_lid1));
51*eec179cfSJacob Faibussowitsch   for (i = 0; i < ec; i++) PetscCall(PetscHMapISetWithMode(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES));
52a11b0bdaSJunchao Zhang   /* compact out the extra columns in B */
53a11b0bdaSJunchao Zhang   for (i = 0; i < B->mbs; i++) {
54a11b0bdaSJunchao Zhang     for (j = 0; j < B->ilen[i]; j++) {
55a11b0bdaSJunchao Zhang       PetscInt gid1 = aj[B->i[i] + j] + 1;
56*eec179cfSJacob Faibussowitsch       PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
57a11b0bdaSJunchao Zhang       lid--;
58a11b0bdaSJunchao Zhang       aj[B->i[i] + j] = lid;
59a11b0bdaSJunchao Zhang     }
60a11b0bdaSJunchao Zhang   }
61*eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&gid1_lid1));
629566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
63a11b0bdaSJunchao Zhang   for (i = j = 0; i < ec; i++) {
64a11b0bdaSJunchao Zhang     while (garray[i] >= owners[j + 1]) j++;
65a11b0bdaSJunchao Zhang     ec_owner[i] = j;
66a11b0bdaSJunchao Zhang   }
67a11b0bdaSJunchao Zhang #else
6840781036SHong Zhang   /* For the first stab we make an array as long as the number of columns */
6940781036SHong Zhang   /* mark those columns that are in sbaij->B */
709566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Nbs, &indices));
7140781036SHong Zhang   for (i = 0; i < mbs; i++) {
7240781036SHong Zhang     for (j = 0; j < B->ilen[i]; j++) {
7340781036SHong Zhang       if (!indices[aj[B->i[i] + j]]) ec++;
7440781036SHong Zhang       indices[aj[B->i[i] + j]] = 1;
7540781036SHong Zhang     }
7640781036SHong Zhang   }
7740781036SHong Zhang 
7840781036SHong Zhang   /* form arrays of columns we need */
799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
809566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
8140781036SHong Zhang 
8240781036SHong Zhang   ec = 0;
83a11b0bdaSJunchao Zhang   for (j = 0; j < sbaij->size; j++) {
8440781036SHong Zhang     for (i = owners[j]; i < owners[j + 1]; i++) {
8540781036SHong Zhang       if (indices[i]) {
8640781036SHong Zhang         garray[ec]   = i;
8740781036SHong Zhang         ec_owner[ec] = j;
8840781036SHong Zhang         ec++;
8940781036SHong Zhang       }
9040781036SHong Zhang     }
9140781036SHong Zhang   }
9240781036SHong Zhang 
9340781036SHong Zhang   /* make indices now point into garray */
94b424e231SHong Zhang   for (i = 0; i < ec; i++) indices[garray[i]] = i;
9540781036SHong Zhang 
9640781036SHong Zhang   /* compact out the extra columns in B */
9740781036SHong Zhang   for (i = 0; i < mbs; i++) {
9840781036SHong Zhang     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
9940781036SHong Zhang   }
1009566063dSJacob Faibussowitsch   PetscCall(PetscFree(indices));
101a11b0bdaSJunchao Zhang #endif
10240781036SHong Zhang   B->nbs = ec;
1039566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
1049566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
10540781036SHong Zhang 
1069566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&sbaij->sMvctx));
10740781036SHong Zhang   /* create local vector that is used to scatter into */
1089566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
10940781036SHong Zhang 
11040781036SHong Zhang   /* create two temporary index sets for building scatter-gather */
1119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * ec, &stmp));
1129566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
11326fbe8dcSKarl Rupp   for (i = 0; i < ec; i++) stmp[i] = i;
1149566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
11540781036SHong Zhang 
11693d1592dSHong Zhang   /* generate the scatter context
117bd096759SJunchao Zhang      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
1189566063dSJacob Faibussowitsch   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
1199566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
1209566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&gvec));
12140781036SHong Zhang 
12240781036SHong Zhang   sbaij->garray = garray;
12326fbe8dcSKarl Rupp 
1249566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1259566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
12640781036SHong Zhang 
12740781036SHong Zhang   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
12840781036SHong Zhang   lsize = (mbs + ec) * bs;
1299566063dSJacob Faibussowitsch   PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0));
1309566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
1319566063dSJacob Faibussowitsch   PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
13240781036SHong Zhang 
1339566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
13440781036SHong Zhang 
13540781036SHong Zhang   /* x index in the IS sfrom */
13640781036SHong Zhang   for (i = 0; i < ec; i++) {
13740781036SHong Zhang     j          = ec_owner[i];
13840781036SHong Zhang     sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
13940781036SHong Zhang   }
14040781036SHong Zhang   /* b index in the IS sfrom */
14140781036SHong Zhang   k = sowners[rank] / bs + mbs;
14240781036SHong Zhang   for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
1439566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
14440781036SHong Zhang 
14540781036SHong Zhang   /* x index in the IS sto */
14640781036SHong Zhang   k = sowners[rank] / bs + mbs;
147e82e9f6bSBarry Smith   for (i = 0; i < ec; i++) stmp[i] = (k + i);
14840781036SHong Zhang   /* b index in the IS sto */
149e82e9f6bSBarry Smith   for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
15040781036SHong Zhang 
1519566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
15240781036SHong Zhang 
1539566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
1549566063dSJacob Faibussowitsch   PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
15540781036SHong Zhang 
1569566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
1579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(sbaij->slvec1, &ptr));
1589566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
1599566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b));
1609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
16140781036SHong Zhang 
1629566063dSJacob Faibussowitsch   PetscCall(VecGetArray(sbaij->slvec0, &ptr));
1639566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b));
1649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
16540781036SHong Zhang 
1669566063dSJacob Faibussowitsch   PetscCall(PetscFree(stmp));
16740781036SHong Zhang 
1689566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1699566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
1709566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sgarray, ec_owner));
17140781036SHong Zhang   PetscFunctionReturn(0);
17240781036SHong Zhang }
17340781036SHong Zhang 
174a30f8f8cSSatish Balay /*
17501b2bd88SHong Zhang      Takes the local part of an already assembled MPISBAIJ matrix
176a30f8f8cSSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
177a30f8f8cSSatish Balay    that require more communication in the matrix vector multiply.
178a30f8f8cSSatish Balay    Thus certain data-structures must be rebuilt.
179a30f8f8cSSatish Balay 
180a30f8f8cSSatish Balay    Kind of slow! But that's what application programmers get when
181a30f8f8cSSatish Balay    they are sloppy.
182a30f8f8cSSatish Balay */
183d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
184d71ae5a4SJacob Faibussowitsch {
1852896befeSSatish Balay   Mat_MPISBAIJ *baij  = (Mat_MPISBAIJ *)A->data;
186a30f8f8cSSatish Balay   Mat           B     = baij->B, Bnew;
187a30f8f8cSSatish Balay   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ *)B->data;
188d0f46423SBarry Smith   PetscInt      i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
1894dfa11a4SJacob Faibussowitsch   PetscInt      k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
190a30f8f8cSSatish Balay   MatScalar    *a = Bbaij->a;
19187828ca2SBarry Smith   PetscScalar  *atmp;
192ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
19313f74950SBarry Smith   PetscInt l;
194a30f8f8cSSatish Balay #endif
195a30f8f8cSSatish Balay 
196a30f8f8cSSatish Balay   PetscFunctionBegin;
197ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
1989566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
19982502324SSatish Balay #endif
200a30f8f8cSSatish Balay   /* free stuff related to matrix-vec multiply */
2019566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->lvec));
2029566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&baij->Mvctx));
20301b2bd88SHong Zhang 
2049566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0));
2059566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0b));
2069566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1));
2079566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1a));
2089566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1b));
20901b2bd88SHong Zhang 
210a30f8f8cSSatish Balay   if (baij->colmap) {
211a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
212*eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapIDestroy(&baij->colmap));
213a30f8f8cSSatish Balay #else
2149566063dSJacob Faibussowitsch     PetscCall(PetscFree(baij->colmap));
215a30f8f8cSSatish Balay #endif
216a30f8f8cSSatish Balay   }
217a30f8f8cSSatish Balay 
218a30f8f8cSSatish Balay   /* make sure that B is assembled so we can access its values */
2199566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2209566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
221a30f8f8cSSatish Balay 
222a30f8f8cSSatish Balay   /* invent new B and copy stuff over */
2239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs, &nz));
224ad540459SPierre Jolivet   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
2259566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
2269566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bnew, m, n, m, n));
2279566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
2289566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
2299566063dSJacob Faibussowitsch   PetscCall(PetscFree(nz));
230a30f8f8cSSatish Balay 
231b38c15b3SStefano Zampini   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
232b38c15b3SStefano Zampini     ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
233b38c15b3SStefano Zampini   }
234b38c15b3SStefano Zampini 
23577341eacSDmitry Karpeev   /*
23677341eacSDmitry Karpeev    Ensure that B's nonzerostate is monotonically increasing.
23777341eacSDmitry Karpeev    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
23877341eacSDmitry Karpeev    */
23977341eacSDmitry Karpeev   Bnew->nonzerostate = B->nonzerostate;
2409566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bs, &rvals));
241a30f8f8cSSatish Balay   for (i = 0; i < mbs; i++) {
242a30f8f8cSSatish Balay     rvals[0] = bs * i;
24326fbe8dcSKarl Rupp     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
244a30f8f8cSSatish Balay     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
245a30f8f8cSSatish Balay       col = garray[Bbaij->j[j]] * bs;
246a30f8f8cSSatish Balay       for (k = 0; k < bs; k++) {
247ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
248a30f8f8cSSatish Balay         for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
249a30f8f8cSSatish Balay #else
25071730473SSatish Balay         atmp = a + j * bs2 + k * bs;
251a30f8f8cSSatish Balay #endif
2529566063dSJacob Faibussowitsch         PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
253a30f8f8cSSatish Balay         col++;
254a30f8f8cSSatish Balay       }
255a30f8f8cSSatish Balay     }
256a30f8f8cSSatish Balay   }
257ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
2589566063dSJacob Faibussowitsch   PetscCall(PetscFree(atmp));
259a30f8f8cSSatish Balay #endif
2609566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->garray));
26126fbe8dcSKarl Rupp 
262f4259b30SLisandro Dalcin   baij->garray = NULL;
26326fbe8dcSKarl Rupp 
2649566063dSJacob Faibussowitsch   PetscCall(PetscFree(rvals));
2659566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
26626fbe8dcSKarl Rupp 
267a30f8f8cSSatish Balay   baij->B = Bnew;
26826fbe8dcSKarl Rupp 
269a30f8f8cSSatish Balay   A->was_assembled = PETSC_FALSE;
270a30f8f8cSSatish Balay   PetscFunctionReturn(0);
271a30f8f8cSSatish Balay }
272