1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 8 { 9 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data; 10 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(sbaij->B->data); 11 PetscInt i, j, *aj = B->j, ec = 0, *garray, *sgarray; 12 PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt; 13 IS from, to; 14 Vec gvec; 15 PetscMPIInt rank = sbaij->rank, lsize; 16 PetscInt *owners = sbaij->rangebs, *ec_owner, k; 17 const PetscInt *sowners; 18 PetscScalar *ptr; 19 #if defined(PETSC_USE_CTABLE) 20 PetscHMapI gid1_lid1 = NULL; /* one-based gid to lid table */ 21 PetscHashIter tpos; 22 PetscInt gid, lid; 23 #else 24 PetscInt Nbs = sbaij->Nbs; 25 PetscInt *indices; 26 #endif 27 28 PetscFunctionBegin; 29 #if defined(PETSC_USE_CTABLE) 30 PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1)); 31 for (i = 0; i < B->mbs; i++) { 32 for (j = 0; j < B->ilen[i]; j++) { 33 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 34 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 35 if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec)); 36 } 37 } 38 /* form array of columns we need */ 39 PetscCall(PetscMalloc1(ec, &garray)); 40 PetscHashIterBegin(gid1_lid1, tpos); 41 while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 42 PetscHashIterGetKey(gid1_lid1, tpos, gid); 43 PetscHashIterGetVal(gid1_lid1, tpos, lid); 44 PetscHashIterNext(gid1_lid1, tpos); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 PetscCall(PetscSortInt(ec, garray)); 50 PetscCall(PetscHMapIClear(gid1_lid1)); 51 for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1)); 52 /* compact out the extra columns in B */ 53 for (i = 0; i < B->mbs; i++) { 54 for (j = 0; j < B->ilen[i]; j++) { 55 PetscInt gid1 = aj[B->i[i] + j] + 1; 56 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 57 lid--; 58 aj[B->i[i] + j] = lid; 59 } 60 } 61 PetscCall(PetscHMapIDestroy(&gid1_lid1)); 62 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 63 for (i = j = 0; i < ec; i++) { 64 while (garray[i] >= owners[j + 1]) j++; 65 ec_owner[i] = j; 66 } 67 #else 68 /* For the first stab we make an array as long as the number of columns */ 69 /* mark those columns that are in sbaij->B */ 70 PetscCall(PetscCalloc1(Nbs, &indices)); 71 for (i = 0; i < mbs; i++) { 72 for (j = 0; j < B->ilen[i]; j++) { 73 if (!indices[aj[B->i[i] + j]]) ec++; 74 indices[aj[B->i[i] + j]] = 1; 75 } 76 } 77 78 /* form arrays of columns we need */ 79 PetscCall(PetscMalloc1(ec, &garray)); 80 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 81 82 ec = 0; 83 for (j = 0; j < sbaij->size; j++) { 84 for (i = owners[j]; i < owners[j + 1]; i++) { 85 if (indices[i]) { 86 garray[ec] = i; 87 ec_owner[ec] = j; 88 ec++; 89 } 90 } 91 } 92 93 /* make indices now point into garray */ 94 for (i = 0; i < ec; i++) indices[garray[i]] = i; 95 96 /* compact out the extra columns in B */ 97 for (i = 0; i < mbs; i++) { 98 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 99 } 100 PetscCall(PetscFree(indices)); 101 #endif 102 B->nbs = ec; 103 PetscCall(PetscLayoutDestroy(&sbaij->B->cmap)); 104 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap)); 105 106 PetscCall(VecScatterDestroy(&sbaij->sMvctx)); 107 /* create local vector that is used to scatter into */ 108 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec)); 109 110 /* create two temporary index sets for building scatter-gather */ 111 PetscCall(PetscMalloc1(2 * ec, &stmp)); 112 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 113 for (i = 0; i < ec; i++) stmp[i] = i; 114 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to)); 115 116 /* generate the scatter context 117 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */ 118 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 119 PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx)); 120 PetscCall(VecDestroy(&gvec)); 121 122 sbaij->garray = garray; 123 124 PetscCall(ISDestroy(&from)); 125 PetscCall(ISDestroy(&to)); 126 127 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 128 lsize = (mbs + ec) * bs; 129 PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0)); 130 PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1)); 131 PetscCall(VecGetSize(sbaij->slvec0, &vec_size)); 132 133 PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners)); 134 135 /* x index in the IS sfrom */ 136 for (i = 0; i < ec; i++) { 137 j = ec_owner[i]; 138 sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]); 139 } 140 /* b index in the IS sfrom */ 141 k = sowners[rank] / bs + mbs; 142 for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j; 143 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from)); 144 145 /* x index in the IS sto */ 146 k = sowners[rank] / bs + mbs; 147 for (i = 0; i < ec; i++) stmp[i] = (k + i); 148 /* b index in the IS sto */ 149 for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec]; 150 151 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to)); 152 153 PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx)); 154 PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 155 156 PetscCall(VecGetLocalSize(sbaij->slvec1, &nt)); 157 PetscCall(VecGetArray(sbaij->slvec1, &ptr)); 158 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a)); 159 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b)); 160 PetscCall(VecRestoreArray(sbaij->slvec1, &ptr)); 161 162 PetscCall(VecGetArray(sbaij->slvec0, &ptr)); 163 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b)); 164 PetscCall(VecRestoreArray(sbaij->slvec0, &ptr)); 165 166 PetscCall(PetscFree(stmp)); 167 168 PetscCall(ISDestroy(&from)); 169 PetscCall(ISDestroy(&to)); 170 PetscCall(PetscFree2(sgarray, ec_owner)); 171 PetscFunctionReturn(0); 172 } 173 174 /* 175 Takes the local part of an already assembled MPISBAIJ matrix 176 and disassembles it. This is to allow new nonzeros into the matrix 177 that require more communication in the matrix vector multiply. 178 Thus certain data-structures must be rebuilt. 179 180 Kind of slow! But that's what application programmers get when 181 they are sloppy. 182 */ 183 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 184 { 185 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data; 186 Mat B = baij->B, Bnew; 187 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 188 PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 189 PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n; 190 MatScalar *a = Bbaij->a; 191 PetscScalar *atmp; 192 #if defined(PETSC_USE_REAL_MAT_SINGLE) 193 PetscInt l; 194 #endif 195 196 PetscFunctionBegin; 197 #if defined(PETSC_USE_REAL_MAT_SINGLE) 198 PetscCall(PetscMalloc1(A->rmap->bs, &atmp)); 199 #endif 200 /* free stuff related to matrix-vec multiply */ 201 PetscCall(VecDestroy(&baij->lvec)); 202 PetscCall(VecScatterDestroy(&baij->Mvctx)); 203 204 PetscCall(VecDestroy(&baij->slvec0)); 205 PetscCall(VecDestroy(&baij->slvec0b)); 206 PetscCall(VecDestroy(&baij->slvec1)); 207 PetscCall(VecDestroy(&baij->slvec1a)); 208 PetscCall(VecDestroy(&baij->slvec1b)); 209 210 if (baij->colmap) { 211 #if defined(PETSC_USE_CTABLE) 212 PetscCall(PetscHMapIDestroy(&baij->colmap)); 213 #else 214 PetscCall(PetscFree(baij->colmap)); 215 #endif 216 } 217 218 /* make sure that B is assembled so we can access its values */ 219 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 220 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 221 222 /* invent new B and copy stuff over */ 223 PetscCall(PetscMalloc1(mbs, &nz)); 224 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 225 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 226 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 227 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 228 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 229 PetscCall(PetscFree(nz)); 230 231 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 232 ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 233 } 234 235 /* 236 Ensure that B's nonzerostate is monotonically increasing. 237 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 238 */ 239 Bnew->nonzerostate = B->nonzerostate; 240 PetscCall(PetscMalloc1(bs, &rvals)); 241 for (i = 0; i < mbs; i++) { 242 rvals[0] = bs * i; 243 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 244 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 245 col = garray[Bbaij->j[j]] * bs; 246 for (k = 0; k < bs; k++) { 247 #if defined(PETSC_USE_REAL_MAT_SINGLE) 248 for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l]; 249 #else 250 atmp = a + j * bs2 + k * bs; 251 #endif 252 PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode)); 253 col++; 254 } 255 } 256 } 257 #if defined(PETSC_USE_REAL_MAT_SINGLE) 258 PetscCall(PetscFree(atmp)); 259 #endif 260 PetscCall(PetscFree(baij->garray)); 261 262 baij->garray = NULL; 263 264 PetscCall(PetscFree(rvals)); 265 PetscCall(MatDestroy(&B)); 266 267 baij->B = Bnew; 268 269 A->was_assembled = PETSC_FALSE; 270 PetscFunctionReturn(0); 271 } 272