1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 8 { 9 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data; 10 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(sbaij->B->data); 11 PetscInt Nbs = sbaij->Nbs, i, j, *aj = B->j, ec = 0, *garray, *sgarray; 12 PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt; 13 IS from, to; 14 Vec gvec; 15 PetscMPIInt rank = sbaij->rank, lsize; 16 PetscInt *owners = sbaij->rangebs, *ec_owner, k; 17 const PetscInt *sowners; 18 PetscScalar *ptr; 19 #if defined(PETSC_USE_CTABLE) 20 PetscTable gid1_lid1; /* one-based gid to lid table */ 21 PetscTablePosition tpos; 22 PetscInt gid, lid; 23 #else 24 PetscInt *indices; 25 #endif 26 27 PetscFunctionBegin; 28 #if defined(PETSC_USE_CTABLE) 29 PetscCall(PetscTableCreate(mbs, Nbs + 1, &gid1_lid1)); 30 for (i = 0; i < B->mbs; i++) { 31 for (j = 0; j < B->ilen[i]; j++) { 32 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 33 PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 34 if (!data) PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 35 } 36 } 37 /* form array of columns we need */ 38 PetscCall(PetscMalloc1(ec, &garray)); 39 PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 40 while (tpos) { 41 PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 42 gid--; 43 lid--; 44 garray[lid] = gid; 45 } 46 PetscCall(PetscSortInt(ec, garray)); 47 PetscCall(PetscTableRemoveAll(gid1_lid1)); 48 for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 49 /* compact out the extra columns in B */ 50 for (i = 0; i < B->mbs; i++) { 51 for (j = 0; j < B->ilen[i]; j++) { 52 PetscInt gid1 = aj[B->i[i] + j] + 1; 53 PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 54 lid--; 55 aj[B->i[i] + j] = lid; 56 } 57 } 58 PetscCall(PetscTableDestroy(&gid1_lid1)); 59 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 60 for (i = j = 0; i < ec; i++) { 61 while (garray[i] >= owners[j + 1]) j++; 62 ec_owner[i] = j; 63 } 64 #else 65 /* For the first stab we make an array as long as the number of columns */ 66 /* mark those columns that are in sbaij->B */ 67 PetscCall(PetscCalloc1(Nbs, &indices)); 68 for (i = 0; i < mbs; i++) { 69 for (j = 0; j < B->ilen[i]; j++) { 70 if (!indices[aj[B->i[i] + j]]) ec++; 71 indices[aj[B->i[i] + j]] = 1; 72 } 73 } 74 75 /* form arrays of columns we need */ 76 PetscCall(PetscMalloc1(ec, &garray)); 77 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 78 79 ec = 0; 80 for (j = 0; j < sbaij->size; j++) { 81 for (i = owners[j]; i < owners[j + 1]; i++) { 82 if (indices[i]) { 83 garray[ec] = i; 84 ec_owner[ec] = j; 85 ec++; 86 } 87 } 88 } 89 90 /* make indices now point into garray */ 91 for (i = 0; i < ec; i++) indices[garray[i]] = i; 92 93 /* compact out the extra columns in B */ 94 for (i = 0; i < mbs; i++) { 95 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 96 } 97 PetscCall(PetscFree(indices)); 98 #endif 99 B->nbs = ec; 100 PetscCall(PetscLayoutDestroy(&sbaij->B->cmap)); 101 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap)); 102 103 PetscCall(VecScatterDestroy(&sbaij->sMvctx)); 104 /* create local vector that is used to scatter into */ 105 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec)); 106 107 /* create two temporary index sets for building scatter-gather */ 108 PetscCall(PetscMalloc1(2 * ec, &stmp)); 109 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 110 for (i = 0; i < ec; i++) stmp[i] = i; 111 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to)); 112 113 /* generate the scatter context 114 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */ 115 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 116 PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx)); 117 PetscCall(VecDestroy(&gvec)); 118 119 sbaij->garray = garray; 120 121 PetscCall(ISDestroy(&from)); 122 PetscCall(ISDestroy(&to)); 123 124 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 125 lsize = (mbs + ec) * bs; 126 PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0)); 127 PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1)); 128 PetscCall(VecGetSize(sbaij->slvec0, &vec_size)); 129 130 PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners)); 131 132 /* x index in the IS sfrom */ 133 for (i = 0; i < ec; i++) { 134 j = ec_owner[i]; 135 sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]); 136 } 137 /* b index in the IS sfrom */ 138 k = sowners[rank] / bs + mbs; 139 for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j; 140 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from)); 141 142 /* x index in the IS sto */ 143 k = sowners[rank] / bs + mbs; 144 for (i = 0; i < ec; i++) stmp[i] = (k + i); 145 /* b index in the IS sto */ 146 for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec]; 147 148 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to)); 149 150 PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx)); 151 PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 152 153 PetscCall(VecGetLocalSize(sbaij->slvec1, &nt)); 154 PetscCall(VecGetArray(sbaij->slvec1, &ptr)); 155 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a)); 156 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b)); 157 PetscCall(VecRestoreArray(sbaij->slvec1, &ptr)); 158 159 PetscCall(VecGetArray(sbaij->slvec0, &ptr)); 160 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b)); 161 PetscCall(VecRestoreArray(sbaij->slvec0, &ptr)); 162 163 PetscCall(PetscFree(stmp)); 164 165 PetscCall(ISDestroy(&from)); 166 PetscCall(ISDestroy(&to)); 167 PetscCall(PetscFree2(sgarray, ec_owner)); 168 PetscFunctionReturn(0); 169 } 170 171 /* 172 Takes the local part of an already assembled MPISBAIJ matrix 173 and disassembles it. This is to allow new nonzeros into the matrix 174 that require more communication in the matrix vector multiply. 175 Thus certain data-structures must be rebuilt. 176 177 Kind of slow! But that's what application programmers get when 178 they are sloppy. 179 */ 180 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 181 { 182 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data; 183 Mat B = baij->B, Bnew; 184 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 185 PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 186 PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n; 187 MatScalar *a = Bbaij->a; 188 PetscScalar *atmp; 189 #if defined(PETSC_USE_REAL_MAT_SINGLE) 190 PetscInt l; 191 #endif 192 193 PetscFunctionBegin; 194 #if defined(PETSC_USE_REAL_MAT_SINGLE) 195 PetscCall(PetscMalloc1(A->rmap->bs, &atmp)); 196 #endif 197 /* free stuff related to matrix-vec multiply */ 198 PetscCall(VecDestroy(&baij->lvec)); 199 PetscCall(VecScatterDestroy(&baij->Mvctx)); 200 201 PetscCall(VecDestroy(&baij->slvec0)); 202 PetscCall(VecDestroy(&baij->slvec0b)); 203 PetscCall(VecDestroy(&baij->slvec1)); 204 PetscCall(VecDestroy(&baij->slvec1a)); 205 PetscCall(VecDestroy(&baij->slvec1b)); 206 207 if (baij->colmap) { 208 #if defined(PETSC_USE_CTABLE) 209 PetscCall(PetscTableDestroy(&baij->colmap)); 210 #else 211 PetscCall(PetscFree(baij->colmap)); 212 #endif 213 } 214 215 /* make sure that B is assembled so we can access its values */ 216 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 217 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 218 219 /* invent new B and copy stuff over */ 220 PetscCall(PetscMalloc1(mbs, &nz)); 221 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 222 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 223 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 224 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 225 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 226 PetscCall(PetscFree(nz)); 227 228 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 229 ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 230 } 231 232 /* 233 Ensure that B's nonzerostate is monotonically increasing. 234 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 235 */ 236 Bnew->nonzerostate = B->nonzerostate; 237 PetscCall(PetscMalloc1(bs, &rvals)); 238 for (i = 0; i < mbs; i++) { 239 rvals[0] = bs * i; 240 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 241 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 242 col = garray[Bbaij->j[j]] * bs; 243 for (k = 0; k < bs; k++) { 244 #if defined(PETSC_USE_REAL_MAT_SINGLE) 245 for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l]; 246 #else 247 atmp = a + j * bs2 + k * bs; 248 #endif 249 PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode)); 250 col++; 251 } 252 } 253 } 254 #if defined(PETSC_USE_REAL_MAT_SINGLE) 255 PetscCall(PetscFree(atmp)); 256 #endif 257 PetscCall(PetscFree(baij->garray)); 258 259 baij->garray = NULL; 260 261 PetscCall(PetscFree(rvals)); 262 PetscCall(MatDestroy(&B)); 263 264 baij->B = Bnew; 265 266 A->was_assembled = PETSC_FALSE; 267 PetscFunctionReturn(0); 268 } 269