1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9 { 10 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 11 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(baij->B->data); 12 PetscInt i, j, *aj = B->j, ec = 0, *garray; 13 PetscInt bs = mat->rmap->bs, *stmp; 14 IS from, to; 15 Vec gvec; 16 #if defined(PETSC_USE_CTABLE) 17 PetscHMapI gid1_lid1 = NULL; 18 PetscHashIter tpos; 19 PetscInt gid, lid; 20 #else 21 PetscInt Nbs = baij->Nbs, *indices; 22 #endif 23 24 PetscFunctionBegin; 25 #if defined(PETSC_USE_CTABLE) 26 /* use a table - Mark Adams */ 27 PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1)); 28 for (i = 0; i < B->mbs; i++) { 29 for (j = 0; j < B->ilen[i]; j++) { 30 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 31 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 32 if (!data) { 33 /* one based table */ 34 PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec)); 35 } 36 } 37 } 38 /* form array of columns we need */ 39 PetscCall(PetscMalloc1(ec, &garray)); 40 PetscHashIterBegin(gid1_lid1, tpos); 41 while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 42 PetscHashIterGetKey(gid1_lid1, tpos, gid); 43 PetscHashIterGetVal(gid1_lid1, tpos, lid); 44 PetscHashIterNext(gid1_lid1, tpos); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 PetscCall(PetscSortInt(ec, garray)); 50 PetscCall(PetscHMapIClear(gid1_lid1)); 51 for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1)); 52 /* compact out the extra columns in B */ 53 for (i = 0; i < B->mbs; i++) { 54 for (j = 0; j < B->ilen[i]; j++) { 55 PetscInt gid1 = aj[B->i[i] + j] + 1; 56 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 57 lid--; 58 aj[B->i[i] + j] = lid; 59 } 60 } 61 B->nbs = ec; 62 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 63 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 64 PetscCall(PetscHMapIDestroy(&gid1_lid1)); 65 #else 66 /* Make an array as long as the number of columns */ 67 /* mark those columns that are in baij->B */ 68 PetscCall(PetscCalloc1(Nbs, &indices)); 69 for (i = 0; i < B->mbs; i++) { 70 for (j = 0; j < B->ilen[i]; j++) { 71 if (!indices[aj[B->i[i] + j]]) ec++; 72 indices[aj[B->i[i] + j]] = 1; 73 } 74 } 75 76 /* form array of columns we need */ 77 PetscCall(PetscMalloc1(ec, &garray)); 78 ec = 0; 79 for (i = 0; i < Nbs; i++) { 80 if (indices[i]) garray[ec++] = i; 81 } 82 83 /* make indices now point into garray */ 84 for (i = 0; i < ec; i++) indices[garray[i]] = i; 85 86 /* compact out the extra columns in B */ 87 for (i = 0; i < B->mbs; i++) { 88 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 89 } 90 B->nbs = ec; 91 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 92 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 93 PetscCall(PetscFree(indices)); 94 #endif 95 96 /* create local vector that is used to scatter into */ 97 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec)); 98 99 /* create two temporary index sets for building scatter-gather */ 100 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 101 102 PetscCall(PetscMalloc1(ec, &stmp)); 103 for (i = 0; i < ec; i++) stmp[i] = i; 104 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to)); 105 106 /* create temporary global vector to generate scatter context */ 107 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 108 109 PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx)); 110 PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 111 112 baij->garray = garray; 113 114 PetscCall(ISDestroy(&from)); 115 PetscCall(ISDestroy(&to)); 116 PetscCall(VecDestroy(&gvec)); 117 PetscFunctionReturn(0); 118 } 119 120 /* 121 Takes the local part of an already assembled MPIBAIJ matrix 122 and disassembles it. This is to allow new nonzeros into the matrix 123 that require more communication in the matrix vector multiply. 124 Thus certain data-structures must be rebuilt. 125 126 Kind of slow! But that's what application programmers get when 127 they are sloppy. 128 */ 129 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 130 { 131 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 132 Mat B = baij->B, Bnew; 133 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 134 PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 135 PetscInt bs2 = baij->bs2, *nz, m = A->rmap->n; 136 MatScalar *a = Bbaij->a; 137 MatScalar *atmp; 138 139 PetscFunctionBegin; 140 /* free stuff related to matrix-vec multiply */ 141 PetscCall(VecDestroy(&baij->lvec)); 142 baij->lvec = NULL; 143 PetscCall(VecScatterDestroy(&baij->Mvctx)); 144 baij->Mvctx = NULL; 145 if (baij->colmap) { 146 #if defined(PETSC_USE_CTABLE) 147 PetscCall(PetscHMapIDestroy(&baij->colmap)); 148 #else 149 PetscCall(PetscFree(baij->colmap)); 150 #endif 151 } 152 153 /* make sure that B is assembled so we can access its values */ 154 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 155 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 156 157 /* invent new B and copy stuff over */ 158 PetscCall(PetscMalloc1(mbs, &nz)); 159 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 160 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew)); 161 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 162 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 163 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 164 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 165 ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 166 } 167 168 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE)); 169 /* 170 Ensure that B's nonzerostate is monotonically increasing. 171 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 172 */ 173 Bnew->nonzerostate = B->nonzerostate; 174 175 for (i = 0; i < mbs; i++) { 176 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 177 col = garray[Bbaij->j[j]]; 178 atmp = a + j * bs2; 179 PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode)); 180 } 181 } 182 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE)); 183 184 PetscCall(PetscFree(nz)); 185 PetscCall(PetscFree(baij->garray)); 186 PetscCall(MatDestroy(&B)); 187 188 baij->B = Bnew; 189 A->was_assembled = PETSC_FALSE; 190 A->assembled = PETSC_FALSE; 191 PetscFunctionReturn(0); 192 } 193 194 /* ugly stuff added for Glenn someday we should fix this up */ 195 196 static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 197 static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 198 199 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) 200 { 201 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */ 202 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data; 203 PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices; 204 PetscInt *r_rmapd, *r_rmapo; 205 206 PetscFunctionBegin; 207 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 208 PetscCall(MatGetSize(ina->A, NULL, &n)); 209 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 210 nt = 0; 211 for (i = 0; i < inA->rmap->mapping->n; i++) { 212 if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) { 213 nt++; 214 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 215 } 216 } 217 PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n); 218 PetscCall(PetscMalloc1(n + 1, &uglyrmapd)); 219 for (i = 0; i < inA->rmap->mapping->n; i++) { 220 if (r_rmapd[i]) { 221 for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j; 222 } 223 } 224 PetscCall(PetscFree(r_rmapd)); 225 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd)); 226 227 PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices)); 228 for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1; 229 no = inA->rmap->mapping->n - nt; 230 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 231 nt = 0; 232 for (i = 0; i < inA->rmap->mapping->n; i++) { 233 if (lindices[inA->rmap->mapping->indices[i]]) { 234 nt++; 235 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 236 } 237 } 238 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 239 PetscCall(PetscFree(lindices)); 240 PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo)); 241 for (i = 0; i < inA->rmap->mapping->n; i++) { 242 if (r_rmapo[i]) { 243 for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j; 244 } 245 } 246 PetscCall(PetscFree(r_rmapo)); 247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo)); 248 PetscFunctionReturn(0); 249 } 250 251 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale) 252 { 253 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 254 255 PetscFunctionBegin; 256 PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 257 PetscFunctionReturn(0); 258 } 259 260 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) 261 { 262 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */ 263 PetscInt n, i; 264 PetscScalar *d, *o; 265 const PetscScalar *s; 266 267 PetscFunctionBegin; 268 if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale)); 269 270 PetscCall(VecGetArrayRead(scale, &s)); 271 272 PetscCall(VecGetLocalSize(uglydd, &n)); 273 PetscCall(VecGetArray(uglydd, &d)); 274 for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 275 PetscCall(VecRestoreArray(uglydd, &d)); 276 /* column scale "diagonal" portion of local matrix */ 277 PetscCall(MatDiagonalScale(a->A, NULL, uglydd)); 278 279 PetscCall(VecGetLocalSize(uglyoo, &n)); 280 PetscCall(VecGetArray(uglyoo, &o)); 281 for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 282 PetscCall(VecRestoreArrayRead(scale, &s)); 283 PetscCall(VecRestoreArray(uglyoo, &o)); 284 /* column scale "off-diagonal" portion of local matrix */ 285 PetscCall(MatDiagonalScale(a->B, NULL, uglyoo)); 286 PetscFunctionReturn(0); 287 } 288