xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
1a30f8f8cSSatish Balay 
2a30f8f8cSSatish Balay /*
3a30f8f8cSSatish Balay    Support for the parallel SBAIJ matrix vector multiply
4a30f8f8cSSatish Balay */
5c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
67cff1425SSatish Balay 
7*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8*d71ae5a4SJacob Faibussowitsch {
940781036SHong Zhang   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ *)mat->data;
1040781036SHong Zhang   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ *)(sbaij->B->data);
11a11b0bdaSJunchao Zhang   PetscInt        Nbs = sbaij->Nbs, i, j, *aj = B->j, ec = 0, *garray, *sgarray;
12d0f46423SBarry Smith   PetscInt        bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
1340781036SHong Zhang   IS              from, to;
1440781036SHong Zhang   Vec             gvec;
15a11b0bdaSJunchao Zhang   PetscMPIInt     rank   = sbaij->rank, lsize;
16077aedafSJed Brown   PetscInt       *owners = sbaij->rangebs, *ec_owner, k;
17077aedafSJed Brown   const PetscInt *sowners;
1840781036SHong Zhang   PetscScalar    *ptr;
19a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE)
20a11b0bdaSJunchao Zhang   PetscTable         gid1_lid1; /* one-based gid to lid table */
21a11b0bdaSJunchao Zhang   PetscTablePosition tpos;
22a11b0bdaSJunchao Zhang   PetscInt           gid, lid;
23a11b0bdaSJunchao Zhang #else
24a11b0bdaSJunchao Zhang   PetscInt *indices;
25a11b0bdaSJunchao Zhang #endif
2640781036SHong Zhang 
2740781036SHong Zhang   PetscFunctionBegin;
28a11b0bdaSJunchao Zhang #if defined(PETSC_USE_CTABLE)
299566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(mbs, Nbs + 1, &gid1_lid1));
30a11b0bdaSJunchao Zhang   for (i = 0; i < B->mbs; i++) {
31a11b0bdaSJunchao Zhang     for (j = 0; j < B->ilen[i]; j++) {
32a11b0bdaSJunchao Zhang       PetscInt data, gid1 = aj[B->i[i] + j] + 1;
339566063dSJacob Faibussowitsch       PetscCall(PetscTableFind(gid1_lid1, gid1, &data));
349566063dSJacob Faibussowitsch       if (!data) PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES));
35a11b0bdaSJunchao Zhang     }
36a11b0bdaSJunchao Zhang   }
37a11b0bdaSJunchao Zhang   /* form array of columns we need */
389566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
399566063dSJacob Faibussowitsch   PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos));
40a11b0bdaSJunchao Zhang   while (tpos) {
419566063dSJacob Faibussowitsch     PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid));
429371c9d4SSatish Balay     gid--;
439371c9d4SSatish Balay     lid--;
44a11b0bdaSJunchao Zhang     garray[lid] = gid;
45a11b0bdaSJunchao Zhang   }
469566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(ec, garray));
479566063dSJacob Faibussowitsch   PetscCall(PetscTableRemoveAll(gid1_lid1));
489566063dSJacob Faibussowitsch   for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES));
49a11b0bdaSJunchao Zhang   /* compact out the extra columns in B */
50a11b0bdaSJunchao Zhang   for (i = 0; i < B->mbs; i++) {
51a11b0bdaSJunchao Zhang     for (j = 0; j < B->ilen[i]; j++) {
52a11b0bdaSJunchao Zhang       PetscInt gid1 = aj[B->i[i] + j] + 1;
539566063dSJacob Faibussowitsch       PetscCall(PetscTableFind(gid1_lid1, gid1, &lid));
54a11b0bdaSJunchao Zhang       lid--;
55a11b0bdaSJunchao Zhang       aj[B->i[i] + j] = lid;
56a11b0bdaSJunchao Zhang     }
57a11b0bdaSJunchao Zhang   }
589566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&gid1_lid1));
599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
60a11b0bdaSJunchao Zhang   for (i = j = 0; i < ec; i++) {
61a11b0bdaSJunchao Zhang     while (garray[i] >= owners[j + 1]) j++;
62a11b0bdaSJunchao Zhang     ec_owner[i] = j;
63a11b0bdaSJunchao Zhang   }
64a11b0bdaSJunchao Zhang #else
6540781036SHong Zhang   /* For the first stab we make an array as long as the number of columns */
6640781036SHong Zhang   /* mark those columns that are in sbaij->B */
679566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(Nbs, &indices));
6840781036SHong Zhang   for (i = 0; i < mbs; i++) {
6940781036SHong Zhang     for (j = 0; j < B->ilen[i]; j++) {
7040781036SHong Zhang       if (!indices[aj[B->i[i] + j]]) ec++;
7140781036SHong Zhang       indices[aj[B->i[i] + j]] = 1;
7240781036SHong Zhang     }
7340781036SHong Zhang   }
7440781036SHong Zhang 
7540781036SHong Zhang   /* form arrays of columns we need */
769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ec, &garray));
779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
7840781036SHong Zhang 
7940781036SHong Zhang   ec = 0;
80a11b0bdaSJunchao Zhang   for (j = 0; j < sbaij->size; j++) {
8140781036SHong Zhang     for (i = owners[j]; i < owners[j + 1]; i++) {
8240781036SHong Zhang       if (indices[i]) {
8340781036SHong Zhang         garray[ec]   = i;
8440781036SHong Zhang         ec_owner[ec] = j;
8540781036SHong Zhang         ec++;
8640781036SHong Zhang       }
8740781036SHong Zhang     }
8840781036SHong Zhang   }
8940781036SHong Zhang 
9040781036SHong Zhang   /* make indices now point into garray */
91b424e231SHong Zhang   for (i = 0; i < ec; i++) indices[garray[i]] = i;
9240781036SHong Zhang 
9340781036SHong Zhang   /* compact out the extra columns in B */
9440781036SHong Zhang   for (i = 0; i < mbs; i++) {
9540781036SHong Zhang     for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
9640781036SHong Zhang   }
979566063dSJacob Faibussowitsch   PetscCall(PetscFree(indices));
98a11b0bdaSJunchao Zhang #endif
9940781036SHong Zhang   B->nbs = ec;
1009566063dSJacob Faibussowitsch   PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
1019566063dSJacob Faibussowitsch   PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
10240781036SHong Zhang 
1039566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&sbaij->sMvctx));
10440781036SHong Zhang   /* create local vector that is used to scatter into */
1059566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
10640781036SHong Zhang 
10740781036SHong Zhang   /* create two temporary index sets for building scatter-gather */
1089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * ec, &stmp));
1099566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
11026fbe8dcSKarl Rupp   for (i = 0; i < ec; i++) stmp[i] = i;
1119566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
11240781036SHong Zhang 
11393d1592dSHong Zhang   /* generate the scatter context
114bd096759SJunchao Zhang      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
1159566063dSJacob Faibussowitsch   PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
1169566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
1179566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&gvec));
11840781036SHong Zhang 
11940781036SHong Zhang   sbaij->garray = garray;
12026fbe8dcSKarl Rupp 
1219566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1229566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
12340781036SHong Zhang 
12440781036SHong Zhang   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
12540781036SHong Zhang   lsize = (mbs + ec) * bs;
1269566063dSJacob Faibussowitsch   PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0));
1279566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
1289566063dSJacob Faibussowitsch   PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
12940781036SHong Zhang 
1309566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
13140781036SHong Zhang 
13240781036SHong Zhang   /* x index in the IS sfrom */
13340781036SHong Zhang   for (i = 0; i < ec; i++) {
13440781036SHong Zhang     j          = ec_owner[i];
13540781036SHong Zhang     sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
13640781036SHong Zhang   }
13740781036SHong Zhang   /* b index in the IS sfrom */
13840781036SHong Zhang   k = sowners[rank] / bs + mbs;
13940781036SHong Zhang   for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
1409566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
14140781036SHong Zhang 
14240781036SHong Zhang   /* x index in the IS sto */
14340781036SHong Zhang   k = sowners[rank] / bs + mbs;
144e82e9f6bSBarry Smith   for (i = 0; i < ec; i++) stmp[i] = (k + i);
14540781036SHong Zhang   /* b index in the IS sto */
146e82e9f6bSBarry Smith   for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
14740781036SHong Zhang 
1489566063dSJacob Faibussowitsch   PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
14940781036SHong Zhang 
1509566063dSJacob Faibussowitsch   PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
1519566063dSJacob Faibussowitsch   PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
15240781036SHong Zhang 
1539566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
1549566063dSJacob Faibussowitsch   PetscCall(VecGetArray(sbaij->slvec1, &ptr));
1559566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
1569566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b));
1579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
15840781036SHong Zhang 
1599566063dSJacob Faibussowitsch   PetscCall(VecGetArray(sbaij->slvec0, &ptr));
1609566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b));
1619566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
16240781036SHong Zhang 
1639566063dSJacob Faibussowitsch   PetscCall(PetscFree(stmp));
16440781036SHong Zhang 
1659566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&from));
1669566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&to));
1679566063dSJacob Faibussowitsch   PetscCall(PetscFree2(sgarray, ec_owner));
16840781036SHong Zhang   PetscFunctionReturn(0);
16940781036SHong Zhang }
17040781036SHong Zhang 
171a30f8f8cSSatish Balay /*
17201b2bd88SHong Zhang      Takes the local part of an already assembled MPISBAIJ matrix
173a30f8f8cSSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
174a30f8f8cSSatish Balay    that require more communication in the matrix vector multiply.
175a30f8f8cSSatish Balay    Thus certain data-structures must be rebuilt.
176a30f8f8cSSatish Balay 
177a30f8f8cSSatish Balay    Kind of slow! But that's what application programmers get when
178a30f8f8cSSatish Balay    they are sloppy.
179a30f8f8cSSatish Balay */
180*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
181*d71ae5a4SJacob Faibussowitsch {
1822896befeSSatish Balay   Mat_MPISBAIJ *baij  = (Mat_MPISBAIJ *)A->data;
183a30f8f8cSSatish Balay   Mat           B     = baij->B, Bnew;
184a30f8f8cSSatish Balay   Mat_SeqBAIJ  *Bbaij = (Mat_SeqBAIJ *)B->data;
185d0f46423SBarry Smith   PetscInt      i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
1864dfa11a4SJacob Faibussowitsch   PetscInt      k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
187a30f8f8cSSatish Balay   MatScalar    *a = Bbaij->a;
18887828ca2SBarry Smith   PetscScalar  *atmp;
189ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
19013f74950SBarry Smith   PetscInt l;
191a30f8f8cSSatish Balay #endif
192a30f8f8cSSatish Balay 
193a30f8f8cSSatish Balay   PetscFunctionBegin;
194ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
1959566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
19682502324SSatish Balay #endif
197a30f8f8cSSatish Balay   /* free stuff related to matrix-vec multiply */
1989566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->lvec));
1999566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&baij->Mvctx));
20001b2bd88SHong Zhang 
2019566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0));
2029566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0b));
2039566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1));
2049566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1a));
2059566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1b));
20601b2bd88SHong Zhang 
207a30f8f8cSSatish Balay   if (baij->colmap) {
208a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
2099566063dSJacob Faibussowitsch     PetscCall(PetscTableDestroy(&baij->colmap));
210a30f8f8cSSatish Balay #else
2119566063dSJacob Faibussowitsch     PetscCall(PetscFree(baij->colmap));
212a30f8f8cSSatish Balay #endif
213a30f8f8cSSatish Balay   }
214a30f8f8cSSatish Balay 
215a30f8f8cSSatish Balay   /* make sure that B is assembled so we can access its values */
2169566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2179566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
218a30f8f8cSSatish Balay 
219a30f8f8cSSatish Balay   /* invent new B and copy stuff over */
2209566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mbs, &nz));
221ad540459SPierre Jolivet   for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
2229566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
2239566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(Bnew, m, n, m, n));
2249566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
2259566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
2269566063dSJacob Faibussowitsch   PetscCall(PetscFree(nz));
227a30f8f8cSSatish Balay 
228b38c15b3SStefano Zampini   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
229b38c15b3SStefano Zampini     ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
230b38c15b3SStefano Zampini   }
231b38c15b3SStefano Zampini 
23277341eacSDmitry Karpeev   /*
23377341eacSDmitry Karpeev    Ensure that B's nonzerostate is monotonically increasing.
23477341eacSDmitry Karpeev    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
23577341eacSDmitry Karpeev    */
23677341eacSDmitry Karpeev   Bnew->nonzerostate = B->nonzerostate;
2379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bs, &rvals));
238a30f8f8cSSatish Balay   for (i = 0; i < mbs; i++) {
239a30f8f8cSSatish Balay     rvals[0] = bs * i;
24026fbe8dcSKarl Rupp     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
241a30f8f8cSSatish Balay     for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
242a30f8f8cSSatish Balay       col = garray[Bbaij->j[j]] * bs;
243a30f8f8cSSatish Balay       for (k = 0; k < bs; k++) {
244ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
245a30f8f8cSSatish Balay         for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
246a30f8f8cSSatish Balay #else
24771730473SSatish Balay         atmp = a + j * bs2 + k * bs;
248a30f8f8cSSatish Balay #endif
2499566063dSJacob Faibussowitsch         PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
250a30f8f8cSSatish Balay         col++;
251a30f8f8cSSatish Balay       }
252a30f8f8cSSatish Balay     }
253a30f8f8cSSatish Balay   }
254ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
2559566063dSJacob Faibussowitsch   PetscCall(PetscFree(atmp));
256a30f8f8cSSatish Balay #endif
2579566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->garray));
25826fbe8dcSKarl Rupp 
259f4259b30SLisandro Dalcin   baij->garray = NULL;
26026fbe8dcSKarl Rupp 
2619566063dSJacob Faibussowitsch   PetscCall(PetscFree(rvals));
2629566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&B));
26326fbe8dcSKarl Rupp 
264a30f8f8cSSatish Balay   baij->B = Bnew;
26526fbe8dcSKarl Rupp 
266a30f8f8cSSatish Balay   A->was_assembled = PETSC_FALSE;
267a30f8f8cSSatish Balay   PetscFunctionReturn(0);
268a30f8f8cSSatish Balay }
269