1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(baij->B->data); 11 PetscInt i, j, *aj = B->j, ec = 0, *garray; 12 PetscInt bs = mat->rmap->bs, *stmp; 13 IS from, to; 14 Vec gvec; 15 #if defined(PETSC_USE_CTABLE) 16 PetscTable gid1_lid1; 17 PetscTablePosition tpos; 18 PetscInt gid, lid; 19 #else 20 PetscInt Nbs = baij->Nbs, *indices; 21 #endif 22 23 PetscFunctionBegin; 24 #if defined(PETSC_USE_CTABLE) 25 /* use a table - Mark Adams */ 26 PetscCall(PetscTableCreate(B->mbs, baij->Nbs + 1, &gid1_lid1)); 27 for (i = 0; i < B->mbs; i++) { 28 for (j = 0; j < B->ilen[i]; j++) { 29 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 30 PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 31 if (!data) { 32 /* one based table */ 33 PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 34 } 35 } 36 } 37 /* form array of columns we need */ 38 PetscCall(PetscMalloc1(ec, &garray)); 39 PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 40 while (tpos) { 41 PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 42 gid--; 43 lid--; 44 garray[lid] = gid; 45 } 46 PetscCall(PetscSortInt(ec, garray)); 47 PetscCall(PetscTableRemoveAll(gid1_lid1)); 48 for (i = 0; i < ec; i++) { PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); } 49 /* compact out the extra columns in B */ 50 for (i = 0; i < B->mbs; i++) { 51 for (j = 0; j < B->ilen[i]; j++) { 52 PetscInt gid1 = aj[B->i[i] + j] + 1; 53 PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 54 lid--; 55 aj[B->i[i] + j] = lid; 56 } 57 } 58 B->nbs = ec; 59 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 60 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 61 PetscCall(PetscTableDestroy(&gid1_lid1)); 62 #else 63 /* Make an array as long as the number of columns */ 64 /* mark those columns that are in baij->B */ 65 PetscCall(PetscCalloc1(Nbs, &indices)); 66 for (i = 0; i < B->mbs; i++) { 67 for (j = 0; j < B->ilen[i]; j++) { 68 if (!indices[aj[B->i[i] + j]]) ec++; 69 indices[aj[B->i[i] + j]] = 1; 70 } 71 } 72 73 /* form array of columns we need */ 74 PetscCall(PetscMalloc1(ec, &garray)); 75 ec = 0; 76 for (i = 0; i < Nbs; i++) { 77 if (indices[i]) { garray[ec++] = i; } 78 } 79 80 /* make indices now point into garray */ 81 for (i = 0; i < ec; i++) { indices[garray[i]] = i; } 82 83 /* compact out the extra columns in B */ 84 for (i = 0; i < B->mbs; i++) { 85 for (j = 0; j < B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } 86 } 87 B->nbs = ec; 88 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 89 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 90 PetscCall(PetscFree(indices)); 91 #endif 92 93 /* create local vector that is used to scatter into */ 94 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec)); 95 96 /* create two temporary index sets for building scatter-gather */ 97 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 98 99 PetscCall(PetscMalloc1(ec, &stmp)); 100 for (i = 0; i < ec; i++) stmp[i] = i; 101 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to)); 102 103 /* create temporary global vector to generate scatter context */ 104 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 105 106 PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx)); 107 PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 108 109 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)baij->Mvctx)); 110 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)baij->lvec)); 111 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from)); 112 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to)); 113 114 baij->garray = garray; 115 116 PetscCall(PetscLogObjectMemory((PetscObject)mat, ec * sizeof(PetscInt))); 117 PetscCall(ISDestroy(&from)); 118 PetscCall(ISDestroy(&to)); 119 PetscCall(VecDestroy(&gvec)); 120 PetscFunctionReturn(0); 121 } 122 123 /* 124 Takes the local part of an already assembled MPIBAIJ matrix 125 and disassembles it. This is to allow new nonzeros into the matrix 126 that require more communication in the matrix vector multiply. 127 Thus certain data-structures must be rebuilt. 128 129 Kind of slow! But that's what application programmers get when 130 they are sloppy. 131 */ 132 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) { 133 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 134 Mat B = baij->B, Bnew; 135 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 136 PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 137 PetscInt bs2 = baij->bs2, *nz, ec, m = A->rmap->n; 138 MatScalar *a = Bbaij->a; 139 MatScalar *atmp; 140 141 PetscFunctionBegin; 142 /* free stuff related to matrix-vec multiply */ 143 PetscCall(VecGetSize(baij->lvec, &ec)); /* needed for PetscLogObjectMemory below */ 144 PetscCall(VecDestroy(&baij->lvec)); 145 baij->lvec = NULL; 146 PetscCall(VecScatterDestroy(&baij->Mvctx)); 147 baij->Mvctx = NULL; 148 if (baij->colmap) { 149 #if defined(PETSC_USE_CTABLE) 150 PetscCall(PetscTableDestroy(&baij->colmap)); 151 #else 152 PetscCall(PetscFree(baij->colmap)); 153 PetscCall(PetscLogObjectMemory((PetscObject)A, -Bbaij->nbs * sizeof(PetscInt))); 154 #endif 155 } 156 157 /* make sure that B is assembled so we can access its values */ 158 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 159 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 160 161 /* invent new B and copy stuff over */ 162 PetscCall(PetscMalloc1(mbs, &nz)); 163 for (i = 0; i < mbs; i++) { nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; } 164 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew)); 165 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 166 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 167 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 168 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 169 ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 170 } 171 172 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE)); 173 /* 174 Ensure that B's nonzerostate is monotonically increasing. 175 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 176 */ 177 Bnew->nonzerostate = B->nonzerostate; 178 179 for (i = 0; i < mbs; i++) { 180 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 181 col = garray[Bbaij->j[j]]; 182 atmp = a + j * bs2; 183 PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode)); 184 } 185 } 186 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE)); 187 188 PetscCall(PetscFree(nz)); 189 PetscCall(PetscFree(baij->garray)); 190 PetscCall(PetscLogObjectMemory((PetscObject)A, -ec * sizeof(PetscInt))); 191 PetscCall(MatDestroy(&B)); 192 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)Bnew)); 193 194 baij->B = Bnew; 195 A->was_assembled = PETSC_FALSE; 196 A->assembled = PETSC_FALSE; 197 PetscFunctionReturn(0); 198 } 199 200 /* ugly stuff added for Glenn someday we should fix this up */ 201 202 static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 203 static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 204 205 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) { 206 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */ 207 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data; 208 PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices; 209 PetscInt *r_rmapd, *r_rmapo; 210 211 PetscFunctionBegin; 212 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 213 PetscCall(MatGetSize(ina->A, NULL, &n)); 214 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 215 nt = 0; 216 for (i = 0; i < inA->rmap->mapping->n; i++) { 217 if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) { 218 nt++; 219 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 220 } 221 } 222 PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n); 223 PetscCall(PetscMalloc1(n + 1, &uglyrmapd)); 224 for (i = 0; i < inA->rmap->mapping->n; i++) { 225 if (r_rmapd[i]) { 226 for (j = 0; j < bs; j++) { uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j; } 227 } 228 } 229 PetscCall(PetscFree(r_rmapd)); 230 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd)); 231 232 PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices)); 233 for (i = 0; i < B->nbs; i++) { lindices[garray[i]] = i + 1; } 234 no = inA->rmap->mapping->n - nt; 235 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 236 nt = 0; 237 for (i = 0; i < inA->rmap->mapping->n; i++) { 238 if (lindices[inA->rmap->mapping->indices[i]]) { 239 nt++; 240 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 241 } 242 } 243 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 244 PetscCall(PetscFree(lindices)); 245 PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo)); 246 for (i = 0; i < inA->rmap->mapping->n; i++) { 247 if (r_rmapo[i]) { 248 for (j = 0; j < bs; j++) { uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j; } 249 } 250 } 251 PetscCall(PetscFree(r_rmapo)); 252 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo)); 253 PetscFunctionReturn(0); 254 } 255 256 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale) { 257 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 258 259 PetscFunctionBegin; 260 PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 261 PetscFunctionReturn(0); 262 } 263 264 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) { 265 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */ 266 PetscInt n, i; 267 PetscScalar *d, *o; 268 const PetscScalar *s; 269 270 PetscFunctionBegin; 271 if (!uglyrmapd) { PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale)); } 272 273 PetscCall(VecGetArrayRead(scale, &s)); 274 275 PetscCall(VecGetLocalSize(uglydd, &n)); 276 PetscCall(VecGetArray(uglydd, &d)); 277 for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 278 PetscCall(VecRestoreArray(uglydd, &d)); 279 /* column scale "diagonal" portion of local matrix */ 280 PetscCall(MatDiagonalScale(a->A, NULL, uglydd)); 281 282 PetscCall(VecGetLocalSize(uglyoo, &n)); 283 PetscCall(VecGetArray(uglyoo, &o)); 284 for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 285 PetscCall(VecRestoreArrayRead(scale, &s)); 286 PetscCall(VecRestoreArray(uglyoo, &o)); 287 /* column scale "off-diagonal" portion of local matrix */ 288 PetscCall(MatDiagonalScale(a->B, NULL, uglyoo)); 289 PetscFunctionReturn(0); 290 } 291