1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) { 8 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data; 9 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(sbaij->B->data); 10 PetscInt Nbs = sbaij->Nbs, i, j, *aj = B->j, ec = 0, *garray, *sgarray; 11 PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt; 12 IS from, to; 13 Vec gvec; 14 PetscMPIInt rank = sbaij->rank, lsize; 15 PetscInt *owners = sbaij->rangebs, *ec_owner, k; 16 const PetscInt *sowners; 17 PetscScalar *ptr; 18 #if defined(PETSC_USE_CTABLE) 19 PetscTable gid1_lid1; /* one-based gid to lid table */ 20 PetscTablePosition tpos; 21 PetscInt gid, lid; 22 #else 23 PetscInt *indices; 24 #endif 25 26 PetscFunctionBegin; 27 #if defined(PETSC_USE_CTABLE) 28 PetscCall(PetscTableCreate(mbs, Nbs + 1, &gid1_lid1)); 29 for (i = 0; i < B->mbs; i++) { 30 for (j = 0; j < B->ilen[i]; j++) { 31 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 32 PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 33 if (!data) PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 34 } 35 } 36 /* form array of columns we need */ 37 PetscCall(PetscMalloc1(ec, &garray)); 38 PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 39 while (tpos) { 40 PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 41 gid--; 42 lid--; 43 garray[lid] = gid; 44 } 45 PetscCall(PetscSortInt(ec, garray)); 46 PetscCall(PetscTableRemoveAll(gid1_lid1)); 47 for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 48 /* compact out the extra columns in B */ 49 for (i = 0; i < B->mbs; i++) { 50 for (j = 0; j < B->ilen[i]; j++) { 51 PetscInt gid1 = aj[B->i[i] + j] + 1; 52 PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 53 lid--; 54 aj[B->i[i] + j] = lid; 55 } 56 } 57 PetscCall(PetscTableDestroy(&gid1_lid1)); 58 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 59 for (i = j = 0; i < ec; i++) { 60 while (garray[i] >= owners[j + 1]) j++; 61 ec_owner[i] = j; 62 } 63 #else 64 /* For the first stab we make an array as long as the number of columns */ 65 /* mark those columns that are in sbaij->B */ 66 PetscCall(PetscCalloc1(Nbs, &indices)); 67 for (i = 0; i < mbs; i++) { 68 for (j = 0; j < B->ilen[i]; j++) { 69 if (!indices[aj[B->i[i] + j]]) ec++; 70 indices[aj[B->i[i] + j]] = 1; 71 } 72 } 73 74 /* form arrays of columns we need */ 75 PetscCall(PetscMalloc1(ec, &garray)); 76 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 77 78 ec = 0; 79 for (j = 0; j < sbaij->size; j++) { 80 for (i = owners[j]; i < owners[j + 1]; i++) { 81 if (indices[i]) { 82 garray[ec] = i; 83 ec_owner[ec] = j; 84 ec++; 85 } 86 } 87 } 88 89 /* make indices now point into garray */ 90 for (i = 0; i < ec; i++) indices[garray[i]] = i; 91 92 /* compact out the extra columns in B */ 93 for (i = 0; i < mbs; i++) { 94 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 PetscCall(PetscFree(indices)); 97 #endif 98 B->nbs = ec; 99 PetscCall(PetscLayoutDestroy(&sbaij->B->cmap)); 100 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap)); 101 102 PetscCall(VecScatterDestroy(&sbaij->sMvctx)); 103 /* create local vector that is used to scatter into */ 104 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec)); 105 106 /* create two temporary index sets for building scatter-gather */ 107 PetscCall(PetscMalloc1(2 * ec, &stmp)); 108 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 109 for (i = 0; i < ec; i++) stmp[i] = i; 110 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to)); 111 112 /* generate the scatter context 113 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */ 114 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 115 PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx)); 116 PetscCall(VecDestroy(&gvec)); 117 118 sbaij->garray = garray; 119 120 PetscCall(ISDestroy(&from)); 121 PetscCall(ISDestroy(&to)); 122 123 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 124 lsize = (mbs + ec) * bs; 125 PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0)); 126 PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1)); 127 PetscCall(VecGetSize(sbaij->slvec0, &vec_size)); 128 129 PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners)); 130 131 /* x index in the IS sfrom */ 132 for (i = 0; i < ec; i++) { 133 j = ec_owner[i]; 134 sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]); 135 } 136 /* b index in the IS sfrom */ 137 k = sowners[rank] / bs + mbs; 138 for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j; 139 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from)); 140 141 /* x index in the IS sto */ 142 k = sowners[rank] / bs + mbs; 143 for (i = 0; i < ec; i++) stmp[i] = (k + i); 144 /* b index in the IS sto */ 145 for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec]; 146 147 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to)); 148 149 PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx)); 150 PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 151 152 PetscCall(VecGetLocalSize(sbaij->slvec1, &nt)); 153 PetscCall(VecGetArray(sbaij->slvec1, &ptr)); 154 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a)); 155 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b)); 156 PetscCall(VecRestoreArray(sbaij->slvec1, &ptr)); 157 158 PetscCall(VecGetArray(sbaij->slvec0, &ptr)); 159 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b)); 160 PetscCall(VecRestoreArray(sbaij->slvec0, &ptr)); 161 162 PetscCall(PetscFree(stmp)); 163 164 PetscCall(ISDestroy(&from)); 165 PetscCall(ISDestroy(&to)); 166 PetscCall(PetscFree2(sgarray, ec_owner)); 167 PetscFunctionReturn(0); 168 } 169 170 /* 171 Takes the local part of an already assembled MPISBAIJ matrix 172 and disassembles it. This is to allow new nonzeros into the matrix 173 that require more communication in the matrix vector multiply. 174 Thus certain data-structures must be rebuilt. 175 176 Kind of slow! But that's what application programmers get when 177 they are sloppy. 178 */ 179 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) { 180 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data; 181 Mat B = baij->B, Bnew; 182 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 183 PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 184 PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n; 185 MatScalar *a = Bbaij->a; 186 PetscScalar *atmp; 187 #if defined(PETSC_USE_REAL_MAT_SINGLE) 188 PetscInt l; 189 #endif 190 191 PetscFunctionBegin; 192 #if defined(PETSC_USE_REAL_MAT_SINGLE) 193 PetscCall(PetscMalloc1(A->rmap->bs, &atmp)); 194 #endif 195 /* free stuff related to matrix-vec multiply */ 196 PetscCall(VecDestroy(&baij->lvec)); 197 PetscCall(VecScatterDestroy(&baij->Mvctx)); 198 199 PetscCall(VecDestroy(&baij->slvec0)); 200 PetscCall(VecDestroy(&baij->slvec0b)); 201 PetscCall(VecDestroy(&baij->slvec1)); 202 PetscCall(VecDestroy(&baij->slvec1a)); 203 PetscCall(VecDestroy(&baij->slvec1b)); 204 205 if (baij->colmap) { 206 #if defined(PETSC_USE_CTABLE) 207 PetscCall(PetscTableDestroy(&baij->colmap)); 208 #else 209 PetscCall(PetscFree(baij->colmap)); 210 #endif 211 } 212 213 /* make sure that B is assembled so we can access its values */ 214 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 215 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 216 217 /* invent new B and copy stuff over */ 218 PetscCall(PetscMalloc1(mbs, &nz)); 219 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 220 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 221 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 222 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 223 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 224 PetscCall(PetscFree(nz)); 225 226 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 227 ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 228 } 229 230 /* 231 Ensure that B's nonzerostate is monotonically increasing. 232 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 233 */ 234 Bnew->nonzerostate = B->nonzerostate; 235 PetscCall(PetscMalloc1(bs, &rvals)); 236 for (i = 0; i < mbs; i++) { 237 rvals[0] = bs * i; 238 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 239 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 240 col = garray[Bbaij->j[j]] * bs; 241 for (k = 0; k < bs; k++) { 242 #if defined(PETSC_USE_REAL_MAT_SINGLE) 243 for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l]; 244 #else 245 atmp = a + j * bs2 + k * bs; 246 #endif 247 PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode)); 248 col++; 249 } 250 } 251 } 252 #if defined(PETSC_USE_REAL_MAT_SINGLE) 253 PetscCall(PetscFree(atmp)); 254 #endif 255 PetscCall(PetscFree(baij->garray)); 256 257 baij->garray = NULL; 258 259 PetscCall(PetscFree(rvals)); 260 PetscCall(MatDestroy(&B)); 261 262 baij->B = Bnew; 263 264 A->was_assembled = PETSC_FALSE; 265 PetscFunctionReturn(0); 266 } 267