1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) { 8 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data; 9 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(sbaij->B->data); 10 PetscInt Nbs = sbaij->Nbs, i, j, *aj = B->j, ec = 0, *garray, *sgarray; 11 PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt; 12 IS from, to; 13 Vec gvec; 14 PetscMPIInt rank = sbaij->rank, lsize; 15 PetscInt *owners = sbaij->rangebs, *ec_owner, k; 16 const PetscInt *sowners; 17 PetscScalar *ptr; 18 #if defined(PETSC_USE_CTABLE) 19 PetscTable gid1_lid1; /* one-based gid to lid table */ 20 PetscTablePosition tpos; 21 PetscInt gid, lid; 22 #else 23 PetscInt *indices; 24 #endif 25 26 PetscFunctionBegin; 27 #if defined(PETSC_USE_CTABLE) 28 PetscCall(PetscTableCreate(mbs, Nbs + 1, &gid1_lid1)); 29 for (i = 0; i < B->mbs; i++) { 30 for (j = 0; j < B->ilen[i]; j++) { 31 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 32 PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 33 if (!data) PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 34 } 35 } 36 /* form array of columns we need */ 37 PetscCall(PetscMalloc1(ec, &garray)); 38 PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 39 while (tpos) { 40 PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 41 gid--; 42 lid--; 43 garray[lid] = gid; 44 } 45 PetscCall(PetscSortInt(ec, garray)); 46 PetscCall(PetscTableRemoveAll(gid1_lid1)); 47 for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 48 /* compact out the extra columns in B */ 49 for (i = 0; i < B->mbs; i++) { 50 for (j = 0; j < B->ilen[i]; j++) { 51 PetscInt gid1 = aj[B->i[i] + j] + 1; 52 PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 53 lid--; 54 aj[B->i[i] + j] = lid; 55 } 56 } 57 PetscCall(PetscTableDestroy(&gid1_lid1)); 58 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 59 for (i = j = 0; i < ec; i++) { 60 while (garray[i] >= owners[j + 1]) j++; 61 ec_owner[i] = j; 62 } 63 #else 64 /* For the first stab we make an array as long as the number of columns */ 65 /* mark those columns that are in sbaij->B */ 66 PetscCall(PetscCalloc1(Nbs, &indices)); 67 for (i = 0; i < mbs; i++) { 68 for (j = 0; j < B->ilen[i]; j++) { 69 if (!indices[aj[B->i[i] + j]]) ec++; 70 indices[aj[B->i[i] + j]] = 1; 71 } 72 } 73 74 /* form arrays of columns we need */ 75 PetscCall(PetscMalloc1(ec, &garray)); 76 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 77 78 ec = 0; 79 for (j = 0; j < sbaij->size; j++) { 80 for (i = owners[j]; i < owners[j + 1]; i++) { 81 if (indices[i]) { 82 garray[ec] = i; 83 ec_owner[ec] = j; 84 ec++; 85 } 86 } 87 } 88 89 /* make indices now point into garray */ 90 for (i = 0; i < ec; i++) indices[garray[i]] = i; 91 92 /* compact out the extra columns in B */ 93 for (i = 0; i < mbs; i++) { 94 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 PetscCall(PetscFree(indices)); 97 #endif 98 B->nbs = ec; 99 PetscCall(PetscLayoutDestroy(&sbaij->B->cmap)); 100 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap)); 101 102 PetscCall(VecScatterDestroy(&sbaij->sMvctx)); 103 /* create local vector that is used to scatter into */ 104 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec)); 105 106 /* create two temporary index sets for building scatter-gather */ 107 PetscCall(PetscMalloc1(2 * ec, &stmp)); 108 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 109 for (i = 0; i < ec; i++) stmp[i] = i; 110 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to)); 111 112 /* generate the scatter context 113 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */ 114 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 115 PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx)); 116 PetscCall(VecDestroy(&gvec)); 117 118 sbaij->garray = garray; 119 120 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->Mvctx)); 121 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->lvec)); 122 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from)); 123 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to)); 124 125 PetscCall(ISDestroy(&from)); 126 PetscCall(ISDestroy(&to)); 127 128 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 129 lsize = (mbs + ec) * bs; 130 PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0)); 131 PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1)); 132 PetscCall(VecGetSize(sbaij->slvec0, &vec_size)); 133 134 PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners)); 135 136 /* x index in the IS sfrom */ 137 for (i = 0; i < ec; i++) { 138 j = ec_owner[i]; 139 sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]); 140 } 141 /* b index in the IS sfrom */ 142 k = sowners[rank] / bs + mbs; 143 for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j; 144 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from)); 145 146 /* x index in the IS sto */ 147 k = sowners[rank] / bs + mbs; 148 for (i = 0; i < ec; i++) stmp[i] = (k + i); 149 /* b index in the IS sto */ 150 for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec]; 151 152 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to)); 153 154 PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx)); 155 PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 156 157 PetscCall(VecGetLocalSize(sbaij->slvec1, &nt)); 158 PetscCall(VecGetArray(sbaij->slvec1, &ptr)); 159 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a)); 160 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b)); 161 PetscCall(VecRestoreArray(sbaij->slvec1, &ptr)); 162 163 PetscCall(VecGetArray(sbaij->slvec0, &ptr)); 164 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b)); 165 PetscCall(VecRestoreArray(sbaij->slvec0, &ptr)); 166 167 PetscCall(PetscFree(stmp)); 168 169 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->sMvctx)); 170 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec0)); 171 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec1)); 172 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec0b)); 173 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec1a)); 174 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sbaij->slvec1b)); 175 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from)); 176 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to)); 177 178 PetscCall(PetscLogObjectMemory((PetscObject)mat, ec * sizeof(PetscInt))); 179 PetscCall(ISDestroy(&from)); 180 PetscCall(ISDestroy(&to)); 181 PetscCall(PetscFree2(sgarray, ec_owner)); 182 PetscFunctionReturn(0); 183 } 184 185 /* 186 Takes the local part of an already assembled MPISBAIJ matrix 187 and disassembles it. This is to allow new nonzeros into the matrix 188 that require more communication in the matrix vector multiply. 189 Thus certain data-structures must be rebuilt. 190 191 Kind of slow! But that's what application programmers get when 192 they are sloppy. 193 */ 194 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) { 195 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data; 196 Mat B = baij->B, Bnew; 197 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 198 PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 199 PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, ec, m = A->rmap->n; 200 MatScalar *a = Bbaij->a; 201 PetscScalar *atmp; 202 #if defined(PETSC_USE_REAL_MAT_SINGLE) 203 PetscInt l; 204 #endif 205 206 PetscFunctionBegin; 207 #if defined(PETSC_USE_REAL_MAT_SINGLE) 208 PetscCall(PetscMalloc1(A->rmap->bs, &atmp)); 209 #endif 210 /* free stuff related to matrix-vec multiply */ 211 PetscCall(VecGetSize(baij->lvec, &ec)); /* needed for PetscLogObjectMemory below */ 212 PetscCall(VecDestroy(&baij->lvec)); 213 PetscCall(VecScatterDestroy(&baij->Mvctx)); 214 215 PetscCall(VecDestroy(&baij->slvec0)); 216 PetscCall(VecDestroy(&baij->slvec0b)); 217 PetscCall(VecDestroy(&baij->slvec1)); 218 PetscCall(VecDestroy(&baij->slvec1a)); 219 PetscCall(VecDestroy(&baij->slvec1b)); 220 221 if (baij->colmap) { 222 #if defined(PETSC_USE_CTABLE) 223 PetscCall(PetscTableDestroy(&baij->colmap)); 224 #else 225 PetscCall(PetscFree(baij->colmap)); 226 PetscCall(PetscLogObjectMemory((PetscObject)A, -Bbaij->nbs * sizeof(PetscInt))); 227 #endif 228 } 229 230 /* make sure that B is assembled so we can access its values */ 231 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 232 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 233 234 /* invent new B and copy stuff over */ 235 PetscCall(PetscMalloc1(mbs, &nz)); 236 for (i = 0; i < mbs; i++) { nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; } 237 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 238 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 239 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 240 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 241 PetscCall(PetscFree(nz)); 242 243 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 244 ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 245 } 246 247 /* 248 Ensure that B's nonzerostate is monotonically increasing. 249 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 250 */ 251 Bnew->nonzerostate = B->nonzerostate; 252 PetscCall(PetscMalloc1(bs, &rvals)); 253 for (i = 0; i < mbs; i++) { 254 rvals[0] = bs * i; 255 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 256 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 257 col = garray[Bbaij->j[j]] * bs; 258 for (k = 0; k < bs; k++) { 259 #if defined(PETSC_USE_REAL_MAT_SINGLE) 260 for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l]; 261 #else 262 atmp = a + j * bs2 + k * bs; 263 #endif 264 PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode)); 265 col++; 266 } 267 } 268 } 269 #if defined(PETSC_USE_REAL_MAT_SINGLE) 270 PetscCall(PetscFree(atmp)); 271 #endif 272 PetscCall(PetscFree(baij->garray)); 273 274 baij->garray = NULL; 275 276 PetscCall(PetscFree(rvals)); 277 PetscCall(PetscLogObjectMemory((PetscObject)A, -ec * sizeof(PetscInt))); 278 PetscCall(MatDestroy(&B)); 279 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)Bnew)); 280 281 baij->B = Bnew; 282 283 A->was_assembled = PETSC_FALSE; 284 PetscFunctionReturn(0); 285 } 286